diff options
Diffstat (limited to 'drivers/gpu')
1510 files changed, 64113 insertions, 29652 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f7ea8e895c0c..7e6bc0b3a589 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -396,9 +396,11 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" - depends on DRM && PCI && HYPERV + depends on DRM && PCI && HYPERV_VMBUS select DRM_CLIENT_SELECTION select DRM_KMS_HELPER select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 5050ac32bba2..4b2f7d794275 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -104,7 +104,11 @@ obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o # obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o -obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm.o + +drm_gpusvm_helper-y := \ + drm_gpusvm.o\ + drm_pagemap.o +obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o @@ -216,6 +220,7 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ obj-$(CONFIG_DRM_LIMA) += lima/ obj-$(CONFIG_DRM_PANFROST) += panfrost/ obj-$(CONFIG_DRM_PANTHOR) += panthor/ +obj-$(CONFIG_DRM_TYR) += tyr/ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ obj-$(CONFIG_DRM_MCDE) += mcde/ obj-$(CONFIG_DRM_TIDSS) += tidss/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 87080c06e5fc..64e7acff8f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -84,7 +84,8 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \ + cyan_skillfish_reg_init.o # add DF block amdgpu-y += \ @@ -137,7 +138,6 @@ amdgpu-y += \ # add DCE block amdgpu-y += \ dce_v10_0.o \ - dce_v11_0.o \ amdgpu_vkms.o # add GFX block diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index e13fbd974141..9569dc16dd3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -71,18 +71,29 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, return NULL; } +static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev) +{ + uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) | + BIT(AMD_IP_BLOCK_TYPE_SDMA); + + if (adev->aid_mask) + ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH); + + return ip_block_mask; +} + static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) { + uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev); + uint32_t ip_block; int r, i; amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { - if (!(adev->ip_blocks[i].version->type == - AMD_IP_BLOCK_TYPE_GFX || - adev->ip_blocks[i].version->type == - AMD_IP_BLOCK_TYPE_SDMA)) + ip_block = BIT(adev->ip_blocks[i].version->type); + if (!(ip_block_mask & ip_block)) continue; r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); @@ -200,8 +211,10 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) { struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM]; + uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev); struct amdgpu_firmware_info *ucode; struct amdgpu_ip_block *cmn_block; + struct amdgpu_ip_block *ih_block; int ucode_count = 0; int i, r; @@ -243,6 +256,18 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) if (r) return r; + if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) { + ih_block = amdgpu_device_ip_get_ip_block(adev, + AMD_IP_BLOCK_TYPE_IH); + if (unlikely(!ih_block)) { + dev_err(adev->dev, "Failed to get IH handle\n"); + return -EINVAL; + } + r = amdgpu_ip_block_resume(ih_block); + if (r) + return r; + } + /* Reinit GFXHUB */ adev->gfxhub.funcs->init(adev); r = adev->gfxhub.funcs->gart_enable(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a5ccd0ada16a..6f5b4a0e0a34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -63,6 +63,7 @@ #include "kgd_pp_interface.h" #include "amd_shared.h" +#include "amdgpu_utils.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" #include "amdgpu_irq.h" @@ -434,7 +435,6 @@ struct amdgpu_clock { uint32_t default_mclk; uint32_t default_sclk; uint32_t default_dispclk; - uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; }; @@ -470,9 +470,6 @@ struct amdgpu_sa_manager { void *cpu_ptr; }; -int amdgpu_fence_slab_init(void); -void amdgpu_fence_slab_fini(void); - /* * IRQS. */ @@ -548,7 +545,7 @@ struct amdgpu_wb { * this value can be accessed directly by using the offset as an index. * For the GPU address, it is necessary to use gpu_addr and the offset. */ - volatile uint32_t *wb; + uint32_t *wb; /** * @gpu_addr: @@ -724,7 +721,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, /* VRAM scratch page for HDP bug, default vram page */ struct amdgpu_mem_scratch { struct amdgpu_bo *robj; - volatile uint32_t *ptr; + uint32_t *ptr; u64 gpu_addr; }; @@ -755,6 +752,7 @@ typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, u struct amdgpu_mmio_remap { u32 reg_offset; resource_size_t bus_addr; + struct amdgpu_bo *bo; }; /* Define the HW IP blocks will be used in driver , add more if necessary */ @@ -822,6 +820,20 @@ struct amdgpu_ip_map_info { uint32_t mask); }; +enum amdgpu_uid_type { + AMDGPU_UID_TYPE_XCD, + AMDGPU_UID_TYPE_AID, + AMDGPU_UID_TYPE_SOC, + AMDGPU_UID_TYPE_MAX +}; + +#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */ + +struct amdgpu_uid { + uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX]; + struct amdgpu_device *adev; +}; + struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; @@ -886,6 +898,7 @@ struct amdgpu_mqd_prop { uint64_t csa_addr; uint64_t fence_address; bool tmz_queue; + bool kernel_queue; }; struct amdgpu_mqd { @@ -898,6 +911,9 @@ struct amdgpu_pcie_reset_ctx { bool in_link_reset; bool occurs_dpc; bool audio_suspended; + struct pci_dev *swus; + struct pci_saved_state *swus_pcistate; + struct pci_saved_state *swds_pcistate; }; /* @@ -931,12 +947,6 @@ enum amdgpu_enforce_isolation_mode { AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3, }; - -/* - * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. - */ -#define AMDGPU_HAS_VRAM(_adev) ((_adev)->gmc.real_vram_size) - struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1140,9 +1150,6 @@ struct amdgpu_device { /* for userq and VM fences */ struct amdgpu_seq64 seq64; - /* KFD */ - struct amdgpu_kfd_dev kfd; - /* UMC */ struct amdgpu_umc umc; @@ -1282,6 +1289,8 @@ struct amdgpu_device { bool debug_exp_resets; bool debug_disable_gpu_ring_reset; bool debug_vm_userptr; + bool debug_disable_ce_logs; + bool debug_enable_ce_cs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; @@ -1303,6 +1312,12 @@ struct amdgpu_device { struct list_head userq_mgr_list; struct mutex userq_mutex; bool userq_halt_for_enforce_isolation; + struct amdgpu_uid *uid_info; + + /* KFD + * Must be last --ends in a flexible-array member. + */ + struct amdgpu_kfd_dev kfd; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1336,6 +1351,11 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev) return container_of(bdev, struct amdgpu_device, mman.bdev); } +static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev) +{ + return !!adev->aid_mask; +} + int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags); void amdgpu_device_fini_hw(struct amdgpu_device *adev); @@ -1387,7 +1407,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, u64 reg_addr, u64 reg_data); u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev); -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); +bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, + enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev); @@ -1558,16 +1579,16 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, int amdgpu_device_mode1_reset(struct amdgpu_device *adev); int amdgpu_device_link_reset(struct amdgpu_device *adev); -bool amdgpu_device_supports_atpx(struct drm_device *dev); -bool amdgpu_device_supports_px(struct drm_device *dev); -bool amdgpu_device_supports_boco(struct drm_device *dev); -bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -int amdgpu_device_supports_baco(struct drm_device *dev); +bool amdgpu_device_supports_atpx(struct amdgpu_device *adev); +bool amdgpu_device_supports_px(struct amdgpu_device *adev); +bool amdgpu_device_supports_boco(struct amdgpu_device *adev); +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev); +int amdgpu_device_supports_baco(struct amdgpu_device *adev); void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); -int amdgpu_device_baco_enter(struct drm_device *dev); -int amdgpu_device_baco_exit(struct drm_device *dev); +int amdgpu_device_baco_enter(struct amdgpu_device *adev); +int amdgpu_device_baco_exit(struct amdgpu_device *adev); void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -1619,6 +1640,7 @@ void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_prepare(struct drm_device *dev); +void amdgpu_device_complete(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); @@ -1669,7 +1691,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size); @@ -1700,8 +1723,11 @@ static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } -static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, - enum amdgpu_ss ss_state) { return 0; } +static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) +{ + return 0; +} static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { } #endif @@ -1714,7 +1740,7 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return #endif #if defined(CONFIG_DRM_AMD_ISP) -int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]); +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev); #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); @@ -1760,4 +1786,24 @@ extern const struct attribute_group amdgpu_flash_attr_group; void amdgpu_set_init_level(struct amdgpu_device *adev, enum amdgpu_init_lvl_id lvl); + +static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev) +{ + u32 status; + int r; + + r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status); + if (r || PCI_POSSIBLE_ERROR(status)) { + dev_err(adev->dev, "device lost from bus!"); + return -ENODEV; + } + + return 0; +} + +void amdgpu_device_set_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst, + uint64_t uid); +uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 3835f2592914..9b3180449150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -76,6 +76,7 @@ static void aca_banks_release(struct aca_banks *banks) list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); + banks->nr_banks--; } } @@ -115,6 +116,11 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; int i; + if (adev->debug_disable_ce_logs && + bank->smu_err_type == ACA_SMU_TYPE_CE && + !ACA_BANK_ERR_IS_DEFFERED(bank)) + return; + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ for (i = 0; i < ARRAY_SIZE(aca_regs); i++) @@ -125,6 +131,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n"); } +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) +{ + + struct aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || type == ACA_HWIP_TYPE_UNKNOW) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + hwid = ACA_REG__IPID__HARDWAREID(ipid); + mcatype = ACA_REG__IPID__MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, int start, int count, struct aca_banks *banks, struct ras_query_context *qctx) @@ -163,6 +190,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ bank.smu_err_type = type; + /* + * Poison being consumed when injecting a UE while running background workloads, + * which are unexpected. + */ + if (type == ACA_SMU_TYPE_UE && + ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) && + !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC)) + continue; + aca_smu_bank_dump(adev, i, count, &bank, qctx); ret = aca_banks_add_bank(banks, &bank); @@ -173,27 +209,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ return 0; } -static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) -{ - - struct aca_hwip *hwip; - int hwid, mcatype; - u64 ipid; - - if (!bank || type == ACA_HWIP_TYPE_UNKNOW) - return false; - - hwip = &aca_hwid_mcatypes[type]; - if (!hwip->hwid) - return false; - - ipid = bank->regs[ACA_REG_IDX_IPID]; - hwid = ACA_REG__IPID__HARDWAREID(ipid); - mcatype = ACA_REG__IPID__MCATYPE(ipid); - - return hwip->hwid == hwid && hwip->mcatype == mcatype; -} - static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) { const struct aca_bank_ops *bank_ops = handle->bank_ops; @@ -224,6 +239,7 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_ mutex_lock(&aerr->lock); list_add_tail(&bank_error->node, &aerr->list); + aerr->nr_errors++; mutex_unlock(&aerr->lock); return bank_error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index f5466c592d94..6c62e27b9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -811,18 +811,18 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, /** * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * @ss_state: current smart shift event * * returns 0 on success, * otherwise return error number. */ -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) { - struct amdgpu_device *adev = drm_to_adev(dev); int r; - if (!amdgpu_device_supports_smart_shift(dev)) + if (!amdgpu_device_supports_smart_shift(adev)) return 0; switch (ss_state) { @@ -1545,7 +1545,7 @@ static int isp_match_acpi_device_ids(struct device *dev, const void *data) return acpi_match_device(data, dev) ? 1 : 0; } -int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]) +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev) { struct device *pdev __free(put_device) = NULL; struct acpi_device *acpi_pdev; @@ -1559,7 +1559,7 @@ int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]) if (!acpi_pdev) return -ENODEV; - strscpy(*hid, acpi_device_hid(acpi_pdev)); + *dev = acpi_pdev; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 55161e5cdc30..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -248,18 +248,42 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc) +{ + if (adev->kfd.dev) { + if (adev->in_s0ix) + kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); + else + kgd2kfd_suspend(adev->kfd.dev, suspend_proc); + } +} + +int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc) +{ + int r = 0; + + if (adev->kfd.dev) { + if (adev->in_s0ix) + r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); + else + r = kgd2kfd_resume(adev->kfd.dev, resume_proc); + } + + return r; +} + +void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev) { if (adev->kfd.dev) - kgd2kfd_suspend(adev->kfd.dev, run_pm); + kgd2kfd_suspend_process(adev->kfd.dev); } -int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) +int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev) { int r = 0; if (adev->kfd.dev) - r = kgd2kfd_resume(adev->kfd.dev, run_pm); + r = kgd2kfd_resume_process(adev->kfd.dev); return r; } @@ -749,12 +773,12 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { - return kgd2kfd_check_and_lock_kfd(); + return kgd2kfd_check_and_lock_kfd(adev->kfd.dev); } void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) { - kgd2kfd_unlock_kfd(); + kgd2kfd_unlock_kfd(adev->kfd.dev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b6ca41859b53..9e120c934cc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -107,11 +107,13 @@ struct amdgpu_kfd_dev { bool init_complete; struct work_struct reset_work; - /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ - struct dev_pagemap pgmap; - /* Client for KFD BO GEM handle allocations */ struct drm_client_dev client; + + /* HMM page migration MEMORY_DEVICE_PRIVATE mapping + * Must be last --ends in a flexible-array member. + */ + struct dev_pagemap pgmap; }; enum kgd_engine_type { @@ -154,8 +156,10 @@ struct amdkfd_process_info { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); -int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc); +int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc); +void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev); +int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); @@ -411,18 +415,22 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); +void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc); +int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc); +void kgd2kfd_suspend_process(struct kfd_dev *kfd); +int kgd2kfd_resume_process(struct kfd_dev *kfd); int kgd2kfd_pre_reset(struct kfd_dev *kfd, struct amdgpu_reset_context *reset_context); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); -int kgd2kfd_check_and_lock_kfd(void); -void kgd2kfd_unlock_kfd(void); +int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd); +void kgd2kfd_unlock_kfd(struct kfd_dev *kfd); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, bool retry_fault); @@ -454,11 +462,20 @@ static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) { } -static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc) { } -static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc) +{ + return 0; +} + +static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd) +{ +} + +static inline int kgd2kfd_resume_process(struct kfd_dev *kfd) { return 0; } @@ -489,12 +506,12 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } -static inline int kgd2kfd_check_and_lock_kfd(void) +static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd) { return 0; } -static inline void kgd2kfd_unlock_kfd(void) +static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd) { } @@ -503,11 +520,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return 0; } +static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } +static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + return 0; +} + static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index ffbaa8bc5eea..1105a09e55dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -320,7 +320,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai if (!down_read_trylock(&adev->reset_domain->sem)) return; - amdgpu_amdkfd_suspend(adev, false); + amdgpu_amdkfd_suspend(adev, true); if (suspend_resume_compute_scheduler(adev, true)) goto out; @@ -333,7 +333,7 @@ static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_wai out: suspend_resume_compute_scheduler(adev, false); - amdgpu_amdkfd_resume(adev, false); + amdgpu_amdkfd_resume(adev, true); up_read(&adev->reset_domain->sem); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 04ef0ca10541..0239114fb6c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -352,7 +352,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -449,7 +449,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 6d08bc2781a3..f2278a0937ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -338,7 +338,7 @@ static int hqd_dump_v10_3(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32_SOC15_IP(GC, addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -435,7 +435,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index e0e6a6a49d90..aaccf0b9947d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -323,7 +323,7 @@ static int hqd_dump_v11(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -420,7 +420,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (7+11+1+12+12) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c index 6f0dc23c901b..e0ceab400b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c @@ -115,7 +115,7 @@ static int hqd_dump_v12(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -146,7 +146,7 @@ static int hqd_sdma_dump_v12(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (last_reg - first_reg + 1) - *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260165bbe373..a2ca9acf8c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) + kfd_mem_limit.max_system_mem_limit) { pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); + if (!no_system_mem_limit) { + ret = -ENOMEM; + goto release; + } + } - if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit) || - (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { + if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) { ret = -ENOMEM; goto release; } + /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with + * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip + * VRAM check since ttm_mem_limit check already cover this allocation + */ + + if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) { + uint64_t vram_available = + vram_size - reserved_for_pt - reserved_for_ras - + atomic64_read(&adev->vram_pin_size); + if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) { + ret = -ENOMEM; + goto release; + } + } + /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ @@ -494,7 +510,8 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); } -static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct kgd_mem *mem) { uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_DEFAULT; @@ -504,7 +521,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - return amdgpu_gem_va_map_flags(adev, mapping_flags); + return mapping_flags; } /** @@ -961,7 +978,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, goto unwind; } attachment[i]->va = va; - attachment[i]->pte_flags = get_pte_flags(adev, mem); + attachment[i]->pte_flags = get_pte_flags(adev, vm, mem); attachment[i]->adev = adev; list_add(&attachment[i]->list, &mem->attachments); @@ -1072,7 +1089,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &range); if (ret) { if (ret == -EAGAIN) pr_debug("Failed to get user pages, try again\n"); @@ -1086,6 +1103,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, pr_err("%s: Failed to reserve BO\n", __func__); goto release_out; } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) @@ -1626,11 +1646,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, uint64_t vram_available, system_mem_available, ttm_mem_available; spin_lock(&kfd_mem_limit.mem_limit_lock); - vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) - - adev->kfd.vram_used_aligned[xcp_id] - - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt - - reserved_for_ras; + if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu) + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id]; + else + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id] + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt + - reserved_for_ras; if (adev->apu_prefer_gtt) { system_mem_available = no_system_mem_limit ? @@ -2305,10 +2329,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *mem) { - if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); /* make sure read happened */ - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); } return 0; } @@ -2544,8 +2567,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, } /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &mem->range); + ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2563,17 +2585,24 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * from the KFD, trigger a segmentation fault in VM debug mode. */ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) { + struct kfd_process *p; + pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n", pid_nr(process_info->pid), mem->va); // Send GPU VM fault to user space - kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid), - mem->va); + p = kfd_lookup_process_by_pid(process_info->pid); + if (p) { + kfd_signal_vm_fault_event_with_userptr(p, mem->va); + kfd_unref_process(p); + } } ret = 0; } + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range); + mutex_lock(&process_info->notifier_lock); /* Mark the BO as valid unless it was invalidated @@ -2969,9 +2998,22 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * struct amdgpu_device *adev = amdgpu_ttm_adev( peer_vm->root.bo->tbo.bdev); + struct amdgpu_fpriv *fpriv = + container_of(peer_vm, struct amdgpu_fpriv, vm); + + ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (ret) { + dev_dbg(adev->dev, + "Memory eviction: handle PRT moved failed, pid %8d. Try again.\n", + pid_nr(process_info->pid)); + goto validate_map_fail; + } + ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket); if (ret) { - pr_debug("Memory eviction: handle moved failed. Try again\n"); + dev_dbg(adev->dev, + "Memory eviction: handle moved failed, pid %8d. Try again.\n", + pid_nr(process_info->pid)); goto validate_map_fail; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index e476e45b996a..763f2b8dcf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -706,7 +706,6 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) } adev->clock.dp_extclk = le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq); - adev->clock.current_dispclk = adev->clock.default_dispclk; adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock); if (adev->clock.max_pixel_clock == 0) @@ -1816,16 +1815,43 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, return sysfs_emit(buf, "%s\n", ctx->vbios_pn); } +static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct atom_context *ctx = adev->mode_info.atom_context; + + return sysfs_emit(buf, "%s\n", ctx->build_num); +} + static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, NULL); +static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL); static struct attribute *amdgpu_vbios_version_attrs[] = { - &dev_attr_vbios_version.attr, - NULL + &dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL }; +static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, + int index) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct atom_context *ctx = adev->mode_info.atom_context; + + if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num)) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vbios_version_attr_group = { - .attrs = amdgpu_vbios_version_attrs + .attrs = amdgpu_vbios_version_attrs, + .is_visible = amdgpu_vbios_version_attrs_is_visible, }; int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 702f6610d024..66fb37b64388 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -184,43 +184,36 @@ void amdgpu_bo_list_put(struct amdgpu_bo_list *list) int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, struct drm_amdgpu_bo_list_entry **info_param) { - const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); + const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr); + const uint32_t bo_info_size = in->bo_info_size; + const uint32_t bo_number = in->bo_number; struct drm_amdgpu_bo_list_entry *info; - int r; - - info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL); - if (!info) - return -ENOMEM; /* copy the handle array from userspace to a kernel buffer */ - r = -EFAULT; - if (likely(info_size == in->bo_info_size)) { - unsigned long bytes = in->bo_number * - in->bo_info_size; - - if (copy_from_user(info, uptr, bytes)) - goto error_free; - + if (likely(info_size == bo_info_size)) { + info = vmemdup_array_user(uptr, bo_number, info_size); + if (IS_ERR(info)) + return PTR_ERR(info); } else { - unsigned long bytes = min(in->bo_info_size, info_size); + const uint32_t bytes = min(bo_info_size, info_size); unsigned i; - memset(info, 0, in->bo_number * info_size); - for (i = 0; i < in->bo_number; ++i) { - if (copy_from_user(&info[i], uptr, bytes)) - goto error_free; + info = kvmalloc_array(bo_number, info_size, GFP_KERNEL); + if (!info) + return -ENOMEM; - uptr += in->bo_info_size; + memset(info, 0, bo_number * info_size); + for (i = 0; i < bo_number; ++i, uptr += bo_info_size) { + if (copy_from_user(&info[i], uptr, bytes)) { + kvfree(info); + return -EFAULT; + } } } *info_param = info; return 0; - -error_free: - kvfree(info); - return r; } int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 555cd6d877c3..a716c9886c74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -38,7 +38,6 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo *bo; struct amdgpu_bo_va *bo_va; uint32_t priority; - struct page **user_pages; struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 5e375e9c4f5d..47e9bfba0642 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -398,30 +398,28 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, struct drm_display_mode *mode = NULL; struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; int i; - static const struct mode_size { + int n; + struct mode_size { + char name[DRM_DISPLAY_MODE_LEN]; int w; int h; - } common_modes[17] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200} + } common_modes[] = { + { "640x480", 640, 480}, + { "800x600", 800, 600}, + { "1024x768", 1024, 768}, + { "1280x720", 1280, 720}, + { "1280x800", 1280, 800}, + {"1280x1024", 1280, 1024}, + { "1440x900", 1440, 900}, + {"1680x1050", 1680, 1050}, + {"1600x1200", 1600, 1200}, + {"1920x1080", 1920, 1080}, + {"1920x1200", 1920, 1200} }; - for (i = 0; i < 17; i++) { + n = ARRAY_SIZE(common_modes); + + for (i = 0; i < n; i++) { if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) { if (common_modes[i].w > 1024 || common_modes[i].h > 768) @@ -434,12 +432,11 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, common_modes[i].h == native_mode->vdisplay)) continue; } - if (common_modes[i].w < 320 || common_modes[i].h < 200) - continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); if (!mode) return; + strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN); drm_mode_probed_add(connector, mode); } @@ -1195,29 +1192,69 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) amdgpu_connector->use_digital = true; } +/** + * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock + * @adev: pointer to amdgpu_device + * + * Return: maximum supported HDMI (TMDS) pixel clock in KHz. + */ +static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev) +{ + if (adev->asic_type >= CHIP_POLARIS10) + return 600000; + else if (adev->asic_type >= CHIP_TONGA) + return 300000; + else + return 297000; +} + +/** + * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors + * @connector: DRM connector to validate the mode on + * @mode: display mode to validate + * + * Validate the given display mode on DVI and HDMI connectors, including + * analog signals on DVI-I. + * + * Return: drm_mode_status indicating whether the mode is valid. + */ static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, const struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev); + const int max_dvi_single_link_pixel_clock = 165000; + int max_digital_pixel_clock_khz; /* XXX check mode bandwidth */ - if (amdgpu_connector->use_digital && (mode->clock > 165000)) { - if ((amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I) || - (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D) || - (amdgpu_connector->connector_object_id == CONNECTOR_OBJECT_ID_HDMI_TYPE_B)) { - return MODE_OK; - } else if (connector->display_info.is_hdmi) { - /* HDMI 1.3+ supports max clock of 340 Mhz */ - if (mode->clock > 340000) - return MODE_CLOCK_HIGH; - else - return MODE_OK; - } else { - return MODE_CLOCK_HIGH; + if (amdgpu_connector->use_digital) { + switch (amdgpu_connector->connector_object_id) { + case CONNECTOR_OBJECT_ID_HDMI_TYPE_A: + max_digital_pixel_clock_khz = max_hdmi_pixel_clock; + break; + case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I: + case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D: + max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock; + break; + case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I: + case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D: + case CONNECTOR_OBJECT_ID_HDMI_TYPE_B: + max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2; + break; } + + /* When the display EDID claims that it's an HDMI display, + * we use the HDMI encoder mode of the display HW, + * so we should verify against the max HDMI clock here. + */ + if (connector->display_info.is_hdmi) + max_digital_pixel_clock_khz = max_hdmi_pixel_clock; + + if (mode->clock > max_digital_pixel_clock_khz) + return MODE_CLOCK_HIGH; } /* check against the max pixel clock */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 5a234eadae8b..425a3e564360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2025 Advanced Micro Devices, Inc. * @@ -68,7 +68,6 @@ void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, hdr->error_severity = sev; hdr->valid_bits.platform_id = 1; - hdr->valid_bits.partition_id = 1; hdr->valid_bits.timestamp = 1; amdgpu_cper_get_timestamp(&hdr->timestamp); @@ -174,7 +173,7 @@ int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, struct cper_sec_nonstd_err *section; bool poison; - poison = (sev == CPER_SEV_NON_FATAL_CORRECTED) ? false : true; + poison = sev != CPER_SEV_NON_FATAL_CORRECTED; section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); @@ -206,24 +205,31 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev { struct cper_sec_desc *section_desc; struct cper_sec_nonstd_err *section; + uint32_t socket_id; section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false, - CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN, + CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN, NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); section->hdr.valid_bits.err_info_cnt = 1; section->hdr.valid_bits.err_context_cnt = 1; section->info.error_type = RUNTIME; + section->info.valid_bits.ms_chk = 1; section->info.ms_chk_bits.err_type_valid = 1; + section->info.ms_chk_bits.err_type = 1; + section->info.ms_chk_bits.pcc = 1; section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); /* Hardcoded Reg dump for bad page threshold CPER */ + socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? + adev->smuio.funcs->get_socket_id(adev) : + 0; section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1; section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137; @@ -234,8 +240,8 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2; section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff; - section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = 0x0; - section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x96; + section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01; + section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12); section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0; section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0; @@ -326,7 +332,9 @@ int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev) return -ENOMEM; } - amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM); + amdgpu_cper_entry_fill_hdr(adev, bp_threshold, + AMDGPU_CPER_TYPE_BP_THRESHOLD, + CPER_SEV_FATAL); ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0); if (ret) return ret; @@ -457,7 +465,7 @@ calc: void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { - u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + u64 pos, wptr_old, rptr; int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -470,9 +478,11 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) return; } + mutex_lock(&ring->adev->cper.ring_lock); + wptr_old = ring->wptr; + rptr = *ring->rptr_cpu_addr & ring->ptr_mask; - mutex_lock(&ring->adev->cper.ring_lock); while (count) { ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); chunk = umin(ent_sz, count); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h index bcb97d245673..353421807387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a2adaacf6adb..2f6a96af7fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -29,6 +29,7 @@ #include <linux/pagemap.h> #include <linux/sync_file.h> #include <linux/dma-buf.h> +#include <linux/hmm.h> #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> @@ -178,25 +179,17 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = &fpriv->vm; - uint64_t *chunk_array_user; uint64_t *chunk_array; uint32_t uf_offset = 0; size_t size; int ret; int i; - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), - GFP_KERNEL); - if (!chunk_array) - return -ENOMEM; - - /* get chunks */ - chunk_array_user = u64_to_user_ptr(cs->in.chunks); - if (copy_from_user(chunk_array, chunk_array_user, - sizeof(uint64_t)*cs->in.num_chunks)) { - ret = -EFAULT; - goto free_chunk; - } + chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks), + cs->in.num_chunks, + sizeof(uint64_t)); + if (IS_ERR(chunk_array)) + return PTR_ERR(chunk_array); p->nchunks = cs->in.num_chunks; p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), @@ -209,7 +202,6 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, for (i = 0; i < p->nchunks; i++) { struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; - uint32_t __user *cdata; chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, @@ -222,20 +214,16 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), - GFP_KERNEL); - if (p->chunks[i].kdata == NULL) { - ret = -ENOMEM; + p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data), + size, + sizeof(uint32_t)); + if (IS_ERR(p->chunks[i].kdata)) { + ret = PTR_ERR(p->chunks[i].kdata); i--; goto free_partial_kdata; } size *= sizeof(uint32_t); - if (copy_from_user(p->chunks[i].kdata, cdata, size)) { - ret = -EFAULT; - goto free_partial_kdata; - } /* Assume the worst on the following checks */ ret = -EINVAL; @@ -286,7 +274,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - if (!p->gang_size) { + if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) { ret = -EINVAL; goto free_all_kdata; } @@ -376,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; + if (!p->adev->debug_enable_ce_cs && + chunk_ib->flags & AMDGPU_IB_FLAG_CE) { + dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); + return -EINVAL; + } + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) @@ -396,7 +390,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_DELAYED, ib); if (r) { - DRM_ERROR("Failed to get ib !\n"); + drm_err(adev_to_drm(p->adev), "Failed to get ib !\n"); return r; } @@ -468,7 +462,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n", handle, point, r); return r; } @@ -714,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) { + if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) { *max_bytes = 0; *max_vis_bytes = 0; return; @@ -896,26 +890,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { bool userpage_invalidated = false; struct amdgpu_bo *bo = e->bo; - int i; - - e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL); - if (!e->user_pages) { - DRM_ERROR("kvmalloc_array failure\n"); - r = -ENOMEM; - goto out_free_user_pages; - } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); - if (r) { - kvfree(e->user_pages); - e->user_pages = NULL; + r = amdgpu_ttm_tt_get_user_pages(bo, &e->range); + if (r) goto out_free_user_pages; - } for (i = 0; i < bo->tbo.ttm->num_pages; i++) { - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) { userpage_invalidated = true; break; } @@ -959,7 +940,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && - e->user_invalidated && e->user_pages) { + e->user_invalidated) { amdgpu_bo_placement_from_domain(e->bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, @@ -968,11 +949,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, - e->user_pages); + e->range); } - - kvfree(e->user_pages); - e->user_pages = NULL; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -983,7 +961,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, amdgpu_cs_bo_validate, p); if (r) { - DRM_ERROR("amdgpu_vm_validate() failed.\n"); + drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; } @@ -1014,11 +992,7 @@ out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = e->bo; - if (!e->user_pages) - continue; amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); - kvfree(e->user_pages); - e->user_pages = NULL; e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); @@ -1061,13 +1035,13 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); if (r) { - DRM_ERROR("IB va_start is invalid\n"); + drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n"); return r; } if ((va_start + ib->length_dw * 4) > (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -1139,6 +1113,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } + if (!amdgpu_vm_ready(vm)) + return -EINVAL; + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -1235,7 +1212,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); if (r) { if (r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n"); return r; } @@ -1448,7 +1425,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { - DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r); + drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r); return r; } @@ -1463,9 +1440,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) - DRM_ERROR("Not enough memory for command submission!\n"); + drm_err(dev, "Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_DEBUG("Failed to process the buffer list %d!\n", r); + drm_dbg(dev, "Failed to process the buffer list %d!\n", r); goto error_fini; } @@ -1764,30 +1741,21 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, { struct amdgpu_device *adev = drm_to_adev(dev); union drm_amdgpu_wait_fences *wait = data; - uint32_t fence_count = wait->in.fence_count; - struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; /* Get the fences from userspace */ - fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), - GFP_KERNEL); - if (fences == NULL) - return -ENOMEM; - - fences_user = u64_to_user_ptr(wait->in.fences); - if (copy_from_user(fences, fences_user, - sizeof(struct drm_amdgpu_fence) * fence_count)) { - r = -EFAULT; - goto err_free_fences; - } + fences = memdup_array_user(u64_to_user_ptr(wait->in.fences), + wait->in.fence_count, + sizeof(struct drm_amdgpu_fence)); + if (IS_ERR(fences)) + return PTR_ERR(fences); if (wait->in.wait_all) r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences); else r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences); -err_free_fences: kfree(fences); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 85567d0d9545..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index dac4b926e7be..a70651050acf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } @@ -2131,6 +2131,61 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_pt_info_read(struct seq_file *m, void *unused) +{ + struct drm_file *file; + struct amdgpu_fpriv *fpriv; + struct amdgpu_bo *root_bo; + struct amdgpu_device *adev; + int r; + + file = m->private; + if (!file) + return -EINVAL; + + adev = drm_to_adev(file->minor->dev); + fpriv = file->driver_priv; + if (!fpriv || !fpriv->vm.root.bo) + return -ENODEV; + + root_bo = amdgpu_bo_ref(fpriv->vm.root.bo); + r = amdgpu_bo_reserve(root_bo, true); + if (r) { + amdgpu_bo_unref(&root_bo); + return -EINVAL; + } + + seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo)); + seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn); + seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level); + seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size); + seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size); + + amdgpu_bo_unreserve(root_bo); + amdgpu_bo_unref(&root_bo); + + return 0; +} + +static int amdgpu_pt_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, amdgpu_pt_info_read, inode->i_private); +} + +static const struct file_operations amdgpu_pt_info_fops = { + .owner = THIS_MODULE, + .open = amdgpu_pt_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ + debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file, + &amdgpu_pt_info_fops); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { @@ -2140,4 +2195,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; } +void amdgpu_debugfs_vm_init(struct drm_file *file) +{ +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 0425432d8659..e7b3c38e5186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -33,4 +33,5 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); +void amdgpu_debugfs_vm_init(struct drm_file *file); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a59f194e3360..3d032c4e2dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -95,6 +95,7 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 @@ -178,6 +179,8 @@ struct amdgpu_init_level amdgpu_init_minimal_xgmi = { BIT(AMD_IP_BLOCK_TYPE_PSP) }; +static void amdgpu_device_load_switch_state(struct amdgpu_device *adev); + static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, enum amd_ip_block_type block) { @@ -232,7 +235,7 @@ static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) { int ret = 0; - if (!amdgpu_sriov_vf(adev)) + if (amdgpu_nbio_is_replay_cnt_supported(adev)) ret = sysfs_create_file(&adev->dev->kobj, &dev_attr_pcie_replay_count.attr); @@ -241,7 +244,7 @@ static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) { - if (!amdgpu_sriov_vf(adev)) + if (amdgpu_nbio_is_replay_cnt_supported(adev)) sysfs_remove_file(&adev->dev->kobj, &dev_attr_pcie_replay_count.attr); } @@ -411,19 +414,16 @@ static const struct attribute_group amdgpu_board_attrs_group = { static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /** * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ATPX power control, * otherwise return false. */ -bool amdgpu_device_supports_px(struct drm_device *dev) +bool amdgpu_device_supports_px(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) return true; return false; @@ -432,15 +432,13 @@ bool amdgpu_device_supports_px(struct drm_device *dev) /** * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ACPI power control, * otherwise return false. */ -bool amdgpu_device_supports_boco(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) return false; @@ -453,29 +451,24 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) /** * amdgpu_device_supports_baco - Does the device support BACO * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Return: * 1 if the device supports BACO; * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ -int amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - return amdgpu_asic_supports_baco(adev); } void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) { - struct drm_device *dev; int bamaco_support; - dev = adev_to_drm(adev); - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - bamaco_support = amdgpu_device_supports_baco(dev); + bamaco_support = amdgpu_device_supports_baco(adev); switch (amdgpu_runtime_pm) { case 2: @@ -495,10 +488,12 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) break; case -1: case -2: - if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + if (amdgpu_device_supports_px(adev)) { + /* enable PX as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + } else if (amdgpu_device_supports_boco(adev)) { + /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else { @@ -547,14 +542,14 @@ no_runtime_pm: * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with Smart Shift support, * otherwise returns false. */ -bool amdgpu_device_supports_smart_shift(struct drm_device *dev) +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev) { - return (amdgpu_device_supports_boco(dev) && + return (amdgpu_device_supports_boco(adev) && amdgpu_acpi_is_power_shift_control_supported()); } @@ -1288,14 +1283,14 @@ u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) */ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) { - DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg); BUG(); return 0; } static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) { - DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); BUG(); return 0; } @@ -1312,15 +1307,17 @@ static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg */ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { - DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write register 0x%04X with 0x%08X\n", reg, + v); BUG(); } static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) { - DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write register 0x%llX with 0x%08X\n", reg, + v); BUG(); } @@ -1336,14 +1333,15 @@ static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, ui */ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) { - DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n", + reg); BUG(); return 0; } static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) { - DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg); BUG(); return 0; } @@ -1360,15 +1358,17 @@ static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t r */ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) { - DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); BUG(); } static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) { - DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", - reg, v); + dev_err(adev->dev, + "Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", + reg, v); BUG(); } @@ -1386,8 +1386,9 @@ static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg) { - DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", - reg, block); + dev_err(adev->dev, + "Invalid callback to read register 0x%04X in block 0x%04X\n", + reg, block); BUG(); return 0; } @@ -1407,8 +1408,9 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, uint32_t block, uint32_t reg, uint32_t v) { - DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", - reg, block, v); + dev_err(adev->dev, + "Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", + reg, block, v); BUG(); } @@ -1694,7 +1696,9 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) - DRM_WARN("System can't access extended configuration space, please check!!\n"); + dev_warn( + adev->dev, + "System can't access extended configuration space, please check!!\n"); /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && @@ -1734,9 +1738,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) r = pci_resize_resource(adev->pdev, 0, rbar_size); if (r == -ENOSPC) - DRM_INFO("Not enough PCI address space for a large BAR."); + dev_info(adev->dev, + "Not enough PCI address space for a large BAR."); else if (r && r != -ENOTSUPP) - DRM_ERROR("Problem resizing BAR0 (%d).", r); + dev_err(adev->dev, "Problem resizing BAR0 (%d).", r); pci_assign_unassigned_bus_resources(adev->pdev->bus); @@ -1838,8 +1843,8 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) case 0: return false; default: - DRM_ERROR("Invalid value for amdgpu.seamless: %d\n", - amdgpu_seamless); + dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n", + amdgpu_seamless); return false; } @@ -1877,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) { + /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. + * It's unclear if this is a platform-specific or GPU-specific issue. + * Disable ASPM on SI for the time being. + */ + if (adev->family == AMDGPU_FAMILY_SI) + return true; + #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); @@ -2015,7 +2027,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) return; if (!is_os_64) { - DRM_WARN("Not 64-bit OS, feature not supported\n"); + dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n"); goto def_value; } si_meminfo(&si); @@ -2030,7 +2042,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) if (total_memory < dram_size_seven_GB) goto def_value1; } else { - DRM_WARN("Smu memory pool size not supported\n"); + dev_warn(adev->dev, "Smu memory pool size not supported\n"); goto def_value; } adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; @@ -2038,7 +2050,7 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) return; def_value1: - DRM_WARN("No enough system memory\n"); + dev_warn(adev->dev, "No enough system memory\n"); def_value: adev->pm.smu_prv_buffer_size = 0; } @@ -2190,7 +2202,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, struct drm_device *dev = pci_get_drvdata(pdev); int r; - if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) + if (amdgpu_device_supports_px(drm_to_adev(dev)) && + state == VGA_SWITCHEROO_OFF) return; if (state == VGA_SWITCHEROO_ON) { @@ -2202,12 +2215,13 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, amdgpu_device_load_pci_state(pdev); r = pci_enable_device(pdev); if (r) - DRM_WARN("pci_enable_device failed (%d)\n", r); + dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n", + r); amdgpu_device_resume(dev, true); dev->switch_power_state = DRM_SWITCH_POWER_ON; } else { - pr_info("switched off\n"); + dev_info(&pdev->dev, "switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); @@ -2274,8 +2288,9 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, r = adev->ip_blocks[i].version->funcs->set_clockgating_state( &adev->ip_blocks[i], state); if (r) - DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_clockgating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -2308,8 +2323,9 @@ int amdgpu_device_ip_set_powergating_state(void *dev, r = adev->ip_blocks[i].version->funcs->set_powergating_state( &adev->ip_blocks[i], state); if (r) - DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_powergating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -2439,6 +2455,33 @@ int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, return 1; } +static const char *ip_block_names[] = { + [AMD_IP_BLOCK_TYPE_COMMON] = "common", + [AMD_IP_BLOCK_TYPE_GMC] = "gmc", + [AMD_IP_BLOCK_TYPE_IH] = "ih", + [AMD_IP_BLOCK_TYPE_SMC] = "smu", + [AMD_IP_BLOCK_TYPE_PSP] = "psp", + [AMD_IP_BLOCK_TYPE_DCE] = "dce", + [AMD_IP_BLOCK_TYPE_GFX] = "gfx", + [AMD_IP_BLOCK_TYPE_SDMA] = "sdma", + [AMD_IP_BLOCK_TYPE_UVD] = "uvd", + [AMD_IP_BLOCK_TYPE_VCE] = "vce", + [AMD_IP_BLOCK_TYPE_ACP] = "acp", + [AMD_IP_BLOCK_TYPE_VCN] = "vcn", + [AMD_IP_BLOCK_TYPE_MES] = "mes", + [AMD_IP_BLOCK_TYPE_JPEG] = "jpeg", + [AMD_IP_BLOCK_TYPE_VPE] = "vpe", + [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm", + [AMD_IP_BLOCK_TYPE_ISP] = "isp", +}; + +static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type) +{ + int idx = (int)type; + + return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown"; +} + /** * amdgpu_device_ip_block_add * @@ -2467,8 +2510,13 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - dev_info(adev->dev, "detected ip block number %d <%s>\n", - adev->num_ip_blocks, ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n", + adev->num_ip_blocks, + ip_block_name(adev, ip_block_version->type), + ip_block_version->major, + ip_block_version->minor, + ip_block_version->rev, + ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks].adev = adev; @@ -2525,9 +2573,11 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } - DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", - amdgpu_virtual_display, pci_address_name, - adev->enable_virtual_display, adev->mode_info.num_crtc); + dev_info( + adev->dev, + "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", + amdgpu_virtual_display, pci_address_name, + adev->enable_virtual_display, adev->mode_info.num_crtc); kfree(pciaddstr); } @@ -2538,8 +2588,9 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; - DRM_INFO("virtual_display:%d, num_crtc:%d\n", - adev->enable_virtual_display, adev->mode_info.num_crtc); + dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n", + adev->enable_virtual_display, + adev->mode_info.num_crtc); } } @@ -2561,9 +2612,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; - if (adev->mman.discovery_bin) - return 0; - switch (adev->asic_type) { default: return 0; @@ -2585,8 +2633,13 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) chip_name = "arcturus"; break; case CHIP_NAVI12: + if (adev->mman.discovery_bin) + return 0; chip_name = "navi12"; break; + case CHIP_CYAN_SKILLFISH: + chip_name = "cyan_skillfish"; + break; } err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, @@ -2666,6 +2719,24 @@ out: return err; } +static void amdgpu_uid_init(struct amdgpu_device *adev) +{ + /* Initialize the UID for the device */ + adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL); + if (!adev->uid_info) { + dev_warn(adev->dev, "Failed to allocate memory for UID\n"); + return; + } + adev->uid_info->adev = adev; +} + +static void amdgpu_uid_fini(struct amdgpu_device *adev) +{ + /* Free the UID memory */ + kfree(adev->uid_info); + adev->uid_info = NULL; +} + /** * amdgpu_device_ip_early_init - run early init for hardware IPs * @@ -2773,21 +2844,29 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; + adev->virt.is_xgmi_node_migrate_enabled = false; + if (amdgpu_sriov_vf(adev)) { + adev->virt.is_xgmi_node_migrate_enabled = + amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4); + } + total = true; for (i = 0; i < adev->num_ip_blocks; i++) { ip_block = &adev->ip_blocks[i]; if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_WARN("disabled ip block: %d <%s>\n", - i, adev->ip_blocks[i].version->funcs->name); + dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i, + adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else if (ip_block->version->funcs->early_init) { r = ip_block->version->funcs->early_init(ip_block); if (r == -ENOENT) { adev->ip_blocks[i].status.valid = false; } else if (r) { - DRM_ERROR("early_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "early_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); total = false; } else { adev->ip_blocks[i].status.valid = true; @@ -2841,6 +2920,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (adev->gmc.xgmi.supported) amdgpu_xgmi_early_init(adev); + if (amdgpu_is_multi_aid(adev)) + amdgpu_uid_init(adev); ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block->status.valid != false) amdgpu_amdkfd_device_probe(adev); @@ -2868,8 +2949,10 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2893,8 +2976,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) continue; r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2932,8 +3016,11 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) } else { r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i] + .version->funcs->name, + r); return r; } adev->ip_blocks[i].status.hw = true; @@ -2988,25 +3075,29 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) r = drm_sched_init(&ring->sched, &args); if (r) { - DRM_ERROR("Failed to create scheduler on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create scheduler on ring %s.\n", + ring->name); return r; } r = amdgpu_uvd_entity_init(adev, ring); if (r) { - DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create UVD scheduling entity on ring %s.\n", + ring->name); return r; } r = amdgpu_vce_entity_init(adev, ring); if (r) { - DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", - ring->name); + dev_err(adev->dev, + "Failed to create VCE scheduling entity on ring %s.\n", + ring->name); return r; } } - amdgpu_xcp_update_partition_sched_list(adev); + if (adev->xcp_mgr) + amdgpu_xcp_update_partition_sched_list(adev); return 0; } @@ -3038,8 +3129,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->sw_init) { r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("sw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "sw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); goto init_failed; } } @@ -3053,7 +3146,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* need to do common hw init early so everything is set up for gmc */ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); + dev_err(adev->dev, "hw_init %d failed %d\n", i, + r); goto init_failed; } adev->ip_blocks[i].status.hw = true; @@ -3065,17 +3159,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) r = amdgpu_device_mem_scratch_init(adev); if (r) { - DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); + dev_err(adev->dev, + "amdgpu_mem_scratch_init failed %d\n", + r); goto init_failed; } r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); + dev_err(adev->dev, "hw_init %d failed %d\n", i, + r); goto init_failed; } r = amdgpu_device_wb_init(adev); if (r) { - DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); + dev_err(adev->dev, + "amdgpu_device_wb_init failed %d\n", r); goto init_failed; } adev->ip_blocks[i].status.hw = true; @@ -3087,14 +3185,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) AMDGPU_GEM_DOMAIN_GTT, AMDGPU_CSA_SIZE); if (r) { - DRM_ERROR("allocate CSA failed %d\n", r); + dev_err(adev->dev, + "allocate CSA failed %d\n", r); goto init_failed; } } r = amdgpu_seq64_init(adev); if (r) { - DRM_ERROR("allocate seq64 failed %d\n", r); + dev_err(adev->dev, "allocate seq64 failed %d\n", + r); goto init_failed; } } @@ -3235,6 +3335,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) * always assumed to be lost. */ switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_LEGACY: case AMD_RESET_METHOD_LINK: case AMD_RESET_METHOD_BACO: case AMD_RESET_METHOD_MODE1: @@ -3284,8 +3385,10 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { - DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_clockgating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3321,8 +3424,10 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { - DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "set_powergating_state(gate) of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3349,7 +3454,7 @@ static int amdgpu_device_enable_mgpu_fan_boost(void) for (i = 0; i < mgpu_info.num_dgpu; i++) { gpu_ins = &(mgpu_info.gpu_ins[i]); adev = gpu_ins->adev; - if (!(adev->flags & AMD_IS_APU) && + if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) && !gpu_ins->mgpu_fan_enabled) { ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); if (ret) @@ -3388,8 +3493,10 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]); if (r) { - DRM_ERROR("late_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_err(adev->dev, + "late_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); return r; } } @@ -3398,7 +3505,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_ras_late_init(adev); if (r) { - DRM_ERROR("amdgpu_ras_late_init failed %d", r); + dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r); return r; } @@ -3412,7 +3519,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_device_enable_mgpu_fan_boost(); if (r) - DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); + dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r); /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ if (amdgpu_passthrough(adev) && @@ -3445,7 +3552,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) r = amdgpu_xgmi_set_pstate(gpu_instance->adev, AMDGPU_XGMI_PSTATE_MIN); if (r) { - DRM_ERROR("pstate setting failed (%d).\n", r); + dev_err(adev->dev, + "pstate setting failed (%d).\n", + r); break; } } @@ -3459,17 +3568,19 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; if (!ip_block->version->funcs->hw_fini) { - DRM_ERROR("hw_fini of IP block <%s> not defined\n", - ip_block->version->funcs->name); + dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n", + ip_block->version->funcs->name); } else { r = ip_block->version->funcs->hw_fini(ip_block); /* XXX handle errors */ if (r) { - DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", - ip_block->version->funcs->name, r); + dev_dbg(adev->dev, + "hw_fini of IP block <%s> failed %d\n", + ip_block->version->funcs->name, r); } } @@ -3510,15 +3621,16 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]); if (r) { - DRM_DEBUG("early_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_dbg(adev->dev, + "early_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } } amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_suspend(adev, false); + amdgpu_amdkfd_suspend(adev, true); amdgpu_userq_suspend(adev); /* Workaround for ASICs need to disable SMC first */ @@ -3533,7 +3645,8 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { if (amdgpu_virt_release_full_gpu(adev, false)) - DRM_ERROR("failed to release exclusive mode on fini\n"); + dev_err(adev->dev, + "failed to release exclusive mode on fini\n"); } return 0; @@ -3581,8 +3694,10 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); /* XXX handle errors */ if (r) { - DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); + dev_dbg(adev->dev, + "sw_fini of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, + r); } } adev->ip_blocks[i].status.sw = false; @@ -3598,6 +3713,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) } amdgpu_ras_fini(adev); + amdgpu_uid_fini(adev); return 0; } @@ -3615,7 +3731,7 @@ static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) r = amdgpu_ib_ring_tests(adev); if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); + dev_err(adev->dev, "ib ring test failed (%d).\n", r); } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) @@ -3756,8 +3872,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { - DRM_ERROR("SMC failed to set mp1 state %d, %d\n", - adev->mp1_state, r); + dev_err(adev->dev, + "SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); return r; } } @@ -4041,12 +4158,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) /** * amdgpu_device_asic_has_dc_support - determine if DC supports the asic * + * @pdev : pci device context * @asic_type: AMD asic type * * Check if there is DC (new modesetting infrastructre) support for an asic. * returns true if DC has support, false if not. */ -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) +bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev, + enum amd_asic_type asic_type) { switch (asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI @@ -4089,7 +4208,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #else default: if (amdgpu_dc > 0) - DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); + dev_info_once( + &pdev->dev, + "Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); return false; #endif } @@ -4108,7 +4229,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; - return amdgpu_device_asic_has_dc_support(adev->asic_type); + return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type); } static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) @@ -4130,13 +4251,13 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { task_barrier_enter(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_enter(adev); if (adev->asic_reset_res) goto fail; task_barrier_exit(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_exit(adev); if (adev->asic_reset_res) goto fail; @@ -4150,7 +4271,8 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) fail: if (adev->asic_reset_res) - DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", + dev_warn(adev->dev, + "ASIC reset failed with error, %d for drm dev, %s", adev->asic_reset_res, adev_to_drm(adev)->unique); amdgpu_put_xgmi_hive(hive); } @@ -4164,18 +4286,10 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0; /* - * By default timeout for non compute jobs is 10000 - * and 60000 for compute jobs. - * In SR-IOV or passthrough mode, timeout for compute - * jobs are 60000 by default. + * By default timeout for jobs is 10 sec */ - adev->gfx_timeout = msecs_to_jiffies(10000); + adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000); adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev)) - adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? - msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else - adev->compute_timeout = msecs_to_jiffies(60000); if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && @@ -4274,7 +4388,7 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; if (adev->gfx.mcbp) - DRM_INFO("MCBP is enabled\n"); + dev_info(adev->dev, "MCBP is enabled\n"); } /** @@ -4290,7 +4404,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { - struct drm_device *ddev = adev_to_drm(adev); struct pci_dev *pdev = adev->pdev; int r, i; bool px = false; @@ -4342,9 +4455,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; - DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", - amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); + dev_info( + adev->dev, + "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", + amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); /* mutex initialization are all done here so we * can recall function without having locking issues @@ -4461,8 +4576,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (!adev->rmmio) return -ENOMEM; - DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); - DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); + dev_info(adev->dev, "register mmio base: 0x%08X\n", + (uint32_t)adev->rmmio_base); + dev_info(adev->dev, "register mmio size: %u\n", + (unsigned int)adev->rmmio_size); /* * Reset domain needs to be present early, before XGMI hive discovered @@ -4599,7 +4716,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = -EINVAL; goto failed; } - DRM_INFO("GPU posting now...\n"); + dev_info(adev->dev, "GPU posting now...\n"); r = amdgpu_device_asic_init(adev); if (r) { dev_err(adev->dev, "gpu post error!\n"); @@ -4709,12 +4826,12 @@ fence_driver_init: r = amdgpu_pm_sysfs_init(adev); if (r) - DRM_ERROR("registering pm sysfs failed (%d).\n", r); + dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r); r = amdgpu_ucode_sysfs_init(adev); if (r) { adev->ucode_sysfs_en = false; - DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r); } else adev->ucode_sysfs_en = true; @@ -4747,7 +4864,7 @@ fence_driver_init: if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); - px = amdgpu_device_supports_px(ddev); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -4913,7 +5030,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->xcp_mgr); adev->xcp_mgr = NULL; - px = amdgpu_device_supports_px(adev_to_drm(adev)); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -4941,7 +5058,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) adev->reset_domain = NULL; kfree(adev->pci_state); - + kfree(adev->pcie_reset_ctx.swds_pcistate); + kfree(adev->pcie_reset_ctx.swus_pcistate); } /** @@ -4961,9 +5079,21 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; + /* No need to evict when going to S5 through S4 callbacks */ + if (system_state == SYSTEM_POWER_OFF) + return 0; + ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); - if (ret) - DRM_WARN("evicting device resources failed\n"); + if (ret) { + dev_warn(adev->dev, "evicting device resources failed\n"); + return ret; + } + + if (adev->in_s4) { + ret = ttm_device_prepare_hibernation(&adev->mman.bdev); + if (ret) + dev_err(adev->dev, "prepare hibernation failed, %d\n", ret); + } return ret; } @@ -5035,6 +5165,28 @@ int amdgpu_device_prepare(struct drm_device *dev) } /** + * amdgpu_device_complete - complete power state transition + * + * @dev: drm dev pointer + * + * Undo the changes from amdgpu_device_prepare. This will be + * called on all resume transitions, including those that failed. + */ +void amdgpu_device_complete(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int i; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->complete) + continue; + adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]); + } +} + +/** * amdgpu_device_suspend - initiate device suspend * * @dev: drm dev pointer @@ -5055,14 +5207,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) adev->in_suspend = true; if (amdgpu_sriov_vf(adev)) { + if (!adev->in_runpm) + amdgpu_amdkfd_suspend_process(adev); amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); if (r) return r; } - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3)) + dev_warn(adev->dev, "smart shift update failed\n"); if (notify_clients) drm_client_dev_suspend(adev_to_drm(adev), false); @@ -5073,10 +5227,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) amdgpu_device_ip_suspend_phase1(adev); - if (!adev->in_s0ix) { - amdgpu_amdkfd_suspend(adev, adev->in_runpm); - amdgpu_userq_suspend(adev); - } + amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + amdgpu_userq_suspend(adev); r = amdgpu_device_evict_resources(adev); if (r) @@ -5098,6 +5250,32 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return 0; } +static inline int amdgpu_virt_resume(struct amdgpu_device *adev) +{ + int r; + unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id; + + /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO) + * may not work. The access could be blocked by nBIF protection as VF isn't in + * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX + * so that QEMU reprograms MSIX table. + */ + amdgpu_restore_msix(adev); + + r = adev->gfxhub.funcs->get_xgmi_info(adev); + if (r) + return r; + + dev_info(adev->dev, "xgmi node, old id %d, new id %d\n", + prev_physical_node_id, adev->gmc.xgmi.physical_node_id); + + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + + return 0; +} + /** * amdgpu_device_resume - initiate device resume * @@ -5119,6 +5297,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) return r; } + if (amdgpu_virt_xgmi_migrate_enabled(adev)) { + r = amdgpu_virt_resume(adev); + if (r) + goto exit; + } + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -5139,15 +5323,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) goto exit; } - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume(adev, adev->in_runpm); - if (r) - goto exit; + r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (r) + goto exit; - r = amdgpu_userq_resume(adev); - if (r) - goto exit; - } + r = amdgpu_userq_resume(adev); + if (r) + goto exit; r = amdgpu_device_ip_late_init(adev); if (r) @@ -5159,6 +5341,9 @@ exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); + + if (!r && !adev->in_runpm) + r = amdgpu_amdkfd_resume_process(adev); } if (r) @@ -5193,10 +5378,12 @@ exit: dev->dev->power.disable_depth--; #endif } + + amdgpu_vram_mgr_clear_reset_blocks(adev); adev->in_suspend = false; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) - DRM_WARN("smart shift update failed\n"); + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0)) + dev_warn(adev->dev, "smart shift update failed\n"); return 0; } @@ -5581,7 +5768,7 @@ int amdgpu_device_link_reset(struct amdgpu_device *adev) dev_info(adev->dev, "GPU link reset\n"); - if (!adev->pcie_reset_ctx.occurs_dpc) + if (!amdgpu_reset_in_dpc(adev)) ret = amdgpu_dpm_link_reset(adev); if (ret) @@ -5710,6 +5897,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ + amdgpu_reset_set_dpc_status(tmp_adev, false); amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { @@ -5727,7 +5915,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job); if (vram_lost) { - DRM_INFO("VRAM is lost due to GPU reset!\n"); + dev_info( + tmp_adev->dev, + "VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); } @@ -6006,29 +6196,19 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) { struct amdgpu_device *tmp_adev; int ret = 0; - u32 status; list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); - if (PCI_POSSIBLE_ERROR(status)) { - dev_err(tmp_adev->dev, "device lost from bus!"); - ret = -ENODEV; - } + ret |= amdgpu_device_bus_status_check(tmp_adev); } return ret; } -static int amdgpu_device_halt_activities(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context, - struct list_head *device_list, - struct amdgpu_hive_info *hive, - bool need_emergency_restart) +static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) { - struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; /* * Build list of devices to reset. @@ -6040,31 +6220,52 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, list_add_tail(&tmp_adev->reset_list, device_list); if (adev->shutdown) tmp_adev->shutdown = true; - if (adev->pcie_reset_ctx.occurs_dpc) + if (amdgpu_reset_in_dpc(adev)) tmp_adev->pcie_reset_ctx.in_link_reset = true; } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); - device_list_handle = device_list; } else { list_add_tail(&adev->reset_list, device_list); - device_list_handle = device_list; } +} - if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { - r = amdgpu_device_health_check(device_list_handle); - if (r) - return r; - } +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} +static void amdgpu_device_halt_activities(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, + struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i; + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -6095,9 +6296,8 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, drm_client_dev_suspend(adev_to_drm(tmp_adev), false); /* disable ras on ALL IPs */ - if (!need_emergency_restart && - (!adev->pcie_reset_ctx.occurs_dpc) && - amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -6113,8 +6313,6 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, } atomic_inc(&tmp_adev->gpu_reset_counter); } - - return r; } static int amdgpu_device_asic_reset(struct amdgpu_device *adev, @@ -6127,11 +6325,7 @@ static int amdgpu_device_asic_reset(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list, reset_list) { - if (adev->pcie_reset_ctx.occurs_dpc) - tmp_adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); - if (adev->pcie_reset_ctx.occurs_dpc) - tmp_adev->no_hw_access = false; /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -6202,25 +6396,32 @@ static int amdgpu_device_sched_resume(struct list_head *device_list, if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); - if (tmp_adev->asic_reset_res) - r = tmp_adev->asic_reset_res; - - tmp_adev->asic_reset_res = 0; - - if (r) { + if (tmp_adev->asic_reset_res) { /* bad news, how to tell it to userspace ? * for ras error, we should report GPU bad status instead of * reset failure */ if (reset_context->src != AMDGPU_RESET_SRC_RAS || !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", - atomic_read(&tmp_adev->gpu_reset_counter)); - amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + dev_info( + tmp_adev->dev, + "GPU reset(%d) failed with error %d \n", + atomic_read( + &tmp_adev->gpu_reset_counter), + tmp_adev->asic_reset_res); + amdgpu_vf_error_put(tmp_adev, + AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, + tmp_adev->asic_reset_res); + if (!r) + r = tmp_adev->asic_reset_res; + tmp_adev->asic_reset_res = 0; } else { - dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); - if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) - DRM_WARN("smart shift update failed\n"); + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", + atomic_read(&tmp_adev->gpu_reset_counter)); + if (amdgpu_acpi_smart_shift_update(tmp_adev, + AMDGPU_SS_DEV_D0)) + dev_warn(tmp_adev->dev, + "smart shift update failed\n"); } } @@ -6252,11 +6453,6 @@ static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, amdgpu_ras_set_error_query_ready(tmp_adev, true); } - - tmp_adev = list_first_entry(device_list, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); - } @@ -6306,14 +6502,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ if (need_emergency_restart && amdgpu_ras_get_context(adev) && amdgpu_ras_get_context(adev)->reboot) { - DRM_WARN("Emergency reboot."); + dev_warn(adev->dev, "Emergency reboot."); ksys_sync_helper(); emergency_restart(); } - dev_info(adev->dev, "GPU %s begin!\n", - need_emergency_restart ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU %s begin!. Source: %d\n", + need_emergency_restart ? "jobs stop" : "reset", + reset_context->src); if (!amdgpu_sriov_vf(adev)) hive = amdgpu_get_xgmi_hive(adev); @@ -6324,11 +6521,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->hive = hive; INIT_LIST_HEAD(&device_list); - r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, - hive, need_emergency_restart); - if (r) - goto end_reset; + amdgpu_device_recovery_prepare(adev, &device_list, hive); + + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(&device_list); + if (r) + goto end_reset; + } + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, + hive, need_emergency_restart); if (need_emergency_restart) goto skip_sched_resume; /* @@ -6337,7 +6542,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -6345,13 +6550,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, r = amdgpu_device_asic_reset(adev, &device_list, reset_context); if (r) - goto end_reset; + goto reset_unlock; skip_hw_reset: r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); if (r) - goto end_reset; + goto reset_unlock; skip_sched_resume: amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6692,12 +6899,11 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, #endif } -int amdgpu_device_baco_enter(struct drm_device *dev) +int amdgpu_device_baco_enter(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; if (ras && adev->ras_enabled && @@ -6707,13 +6913,12 @@ int amdgpu_device_baco_enter(struct drm_device *dev) return amdgpu_dpm_baco_enter(adev); } -int amdgpu_device_baco_exit(struct drm_device *dev) +int amdgpu_device_baco_exit(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; ret = amdgpu_dpm_baco_exit(adev); @@ -6744,18 +6949,13 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta { struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + struct amdgpu_hive_info *hive __free(xgmi_put_hive) = + amdgpu_get_xgmi_hive(adev); struct amdgpu_reset_context reset_context; struct list_head device_list; - int r = 0; dev_info(adev->dev, "PCI error: detected callback!!\n"); - if (!amdgpu_dpm_is_link_reset_supported(adev)) { - dev_warn(adev->dev, "No support for XGMI hive yet...\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - adev->pci_channel_state = state; switch (state) { @@ -6765,21 +6965,32 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta case pci_channel_io_frozen: /* Fatal error, prepare for slot reset */ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); + if (hive) { + /* Hive devices should be able to support FW based + * link reset on other devices, if not return. + */ + if (!amdgpu_dpm_is_link_reset_supported(adev)) { + dev_warn(adev->dev, + "No support for XGMI hive yet...\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + /* Set dpc status only if device is part of hive + * Non-hive devices should be able to recover after + * link reset. + */ + amdgpu_reset_set_dpc_status(adev, true); - if (hive) mutex_lock(&hive->hive_lock); - adev->pcie_reset_ctx.occurs_dpc = true; + } memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); - r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, - hive, false); - if (hive) { + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, + hive, false); + if (hive) mutex_unlock(&hive->hive_lock); - amdgpu_put_xgmi_hive(hive); - } - if (r) - return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: /* Permanent error, prepare for device removal */ @@ -6827,22 +7038,34 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct amdgpu_device *tmp_adev; struct amdgpu_hive_info *hive; struct list_head device_list; - int r = 0, i; + struct pci_dev *link_dev; + int r = 0, i, timeout; u32 memsize; - - /* PCI error slot reset should be skipped During RAS recovery */ - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) && - amdgpu_ras_in_recovery(adev)) - return PCI_ERS_RESULT_RECOVERED; + u16 status; dev_info(adev->dev, "PCI error: slot reset callback!!\n"); memset(&reset_context, 0, sizeof(reset_context)); - /* wait for asic to come out of reset */ - msleep(700); + if (adev->pcie_reset_ctx.swus) + link_dev = adev->pcie_reset_ctx.swus; + else + link_dev = adev->pdev; + /* wait for asic to come out of reset, timeout = 10s */ + timeout = 10000; + do { + usleep_range(10000, 10500); + r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status); + timeout -= 10; + } while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) && + (status != PCI_VENDOR_ID_AMD)); + + if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) { + r = -ETIME; + goto out; + } + amdgpu_device_load_switch_state(adev); /* Restore PCI confspace */ amdgpu_device_load_pci_state(pdev); @@ -6889,8 +7112,8 @@ out: if (hive) { list_for_each_entry(tmp_adev, &device_list, reset_list) amdgpu_device_unset_mp1_state(tmp_adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); } if (hive) { @@ -6936,7 +7159,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); - adev->pcie_reset_ctx.occurs_dpc = false; + amdgpu_device_recovery_put_reset_lock(adev, &device_list); if (hive) { mutex_unlock(&hive->hive_lock); @@ -6944,6 +7167,65 @@ void amdgpu_pci_resume(struct pci_dev *pdev) } } +static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev) +{ + struct pci_dev *swus, *swds; + int r; + + swds = pci_upstream_bridge(adev->pdev); + if (!swds || swds->vendor != PCI_VENDOR_ID_ATI || + pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM) + return; + swus = pci_upstream_bridge(swds); + if (!swus || + (swus->vendor != PCI_VENDOR_ID_ATI && + swus->vendor != PCI_VENDOR_ID_AMD) || + pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM) + return; + + /* If already saved, return */ + if (adev->pcie_reset_ctx.swus) + return; + /* Upstream bridge is ATI, assume it's SWUS/DS architecture */ + r = pci_save_state(swds); + if (r) + return; + adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds); + + r = pci_save_state(swus); + if (r) + return; + adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus); + + adev->pcie_reset_ctx.swus = swus; +} + +static void amdgpu_device_load_switch_state(struct amdgpu_device *adev) +{ + struct pci_dev *pdev; + int r; + + if (!adev->pcie_reset_ctx.swds_pcistate || + !adev->pcie_reset_ctx.swus_pcistate) + return; + + pdev = adev->pcie_reset_ctx.swus; + r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate); + if (!r) { + pci_restore_state(pdev); + } else { + dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r); + return; + } + + pdev = pci_upstream_bridge(adev->pdev); + r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate); + if (!r) + pci_restore_state(pdev); + else + dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r); +} + bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); @@ -6960,14 +7242,16 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) adev->pci_state = pci_store_saved_state(pdev); if (!adev->pci_state) { - DRM_ERROR("Failed to store PCI saved state"); + dev_err(adev->dev, "Failed to store PCI saved state"); return false; } } else { - DRM_WARN("Failed to save PCI state, err:%d\n", r); + dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r); return false; } + amdgpu_device_cache_switch_state(adev); + return true; } @@ -6985,7 +7269,7 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev) if (!r) { pci_restore_state(pdev); } else { - DRM_WARN("Failed to load PCI state, err:%d\n", r); + dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r); return false; } @@ -7231,7 +7515,7 @@ struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, dep = amdgpu_sync_peek_fence(&isolation->prev, ring); r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); if (r) - DRM_WARN("OOM tracking isolation\n"); + dev_warn(adev->dev, "OOM tracking isolation\n"); out_grab_ref: dma_fence_get(dep); @@ -7299,9 +7583,11 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, tmp_ = RREG32(reg_addr); loop--; if (!loop) { - DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", - inst, reg_name, (uint32_t)expected_value, - (uint32_t)(tmp_ & (mask))); + dev_warn( + adev->dev, + "Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", + inst, reg_name, (uint32_t)expected_value, + (uint32_t)(tmp_ & (mask))); ret = -ETIMEDOUT; break; } @@ -7352,3 +7638,53 @@ ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) size += sysfs_emit_at(buf, size, "\n"); return size; } + +void amdgpu_device_set_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst, + uint64_t uid) +{ + if (!uid_info) + return; + + if (type >= AMDGPU_UID_TYPE_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n", + type); + return; + } + + if (inst >= AMDGPU_UID_INST_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n", + inst); + return; + } + + if (uid_info->uid[type][inst] != 0) { + dev_warn_once( + uid_info->adev->dev, + "Overwriting existing UID %llu for type %d instance %d\n", + uid_info->uid[type][inst], type, inst); + } + + uid_info->uid[type][inst] = uid; +} + +u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info, + enum amdgpu_uid_type type, uint8_t inst) +{ + if (!uid_info) + return 0; + + if (type >= AMDGPU_UID_TYPE_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n", + type); + return 0; + } + + if (inst >= AMDGPU_UID_INST_MAX) { + dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n", + inst); + return 0; + } + + return uid_info->uid[type][inst]; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a0e9bf9b2710..dd7b2b796427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -276,7 +276,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, u32 msg; if (!amdgpu_sriov_vf(adev)) { - /* It can take up to a second for IFWI init to complete on some dGPUs, + /* It can take up to two second for IFWI init to complete on some dGPUs, * but generally it should be in the 60-100ms range. Normally this starts * as soon as the device gets power so by the time the OS loads this has long * completed. However, when a card is hotplugged via e.g., USB4, we need to @@ -284,7 +284,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * continue. */ - for (i = 0; i < 1000; i++) { + for (i = 0; i < 2000; i++) { msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; @@ -321,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, const struct firmware *fw; int r; - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -459,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { - dev_info(adev->dev, "use ip discovery information from file"); + drm_dbg(&adev->ddev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1035,7 +1033,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, /* Until a uniform way is figured, get mask based on hwid */ switch (hw_id) { case VCN_HWID: - harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; + /* VCN vs UVD+VCE */ + if (!amdgpu_ip_version(adev, VCE_HWIP, 0)) + harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; break; case DMU_HWID: if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK) @@ -1338,10 +1338,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } wafl_ver = 0; adev->gfx.xcc_mask = 0; @@ -2128,7 +2126,6 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 7): - case IP_VERSION(11, 0, 8): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): @@ -2136,6 +2133,10 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); break; + case IP_VERSION(11, 0, 8): + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); @@ -2559,41 +2560,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_RAVEN: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: - /* this is not fatal. We have a fallback below - * if the new firmwares are not present. some of - * this will be overridden below to keep things - * consistent with the current behavior. + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. */ - r = amdgpu_discovery_reg_base_init(adev); - if (!r) { - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); - } - break; - default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); - break; - } - - switch (adev->asic_type) { - case CHIP_VEGA10: + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0); @@ -2613,9 +2589,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0); break; case CHIP_VEGA12: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1); @@ -2635,10 +2618,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1); break; case CHIP_RAVEN: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 1; + adev->sdma.sdma_mask = 1; adev->vcn.num_vcn_inst = 1; adev->gmc.num_umc = 2; + adev->gfx.xcc_mask = 1; if (adev->apu_flags & AMD_APU_IS_RAVEN2) { adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0); @@ -2676,9 +2666,16 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) } break; case CHIP_VEGA20: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); vega20_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0); @@ -2699,10 +2696,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0); break; case CHIP_ARCTURUS: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); arct_reg_base_init(adev); adev->sdma.num_instances = 8; + adev->sdma.sdma_mask = 0xff; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1); @@ -2727,10 +2731,17 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0); break; case CHIP_ALDEBARAN: + /* This is not fatal. We only need the discovery + * binary for sysfs. We don't need it for a + * functional system. + */ + amdgpu_discovery_init(adev); aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; + adev->sdma.sdma_mask = 0x1f; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0); @@ -2752,7 +2763,49 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0); adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) + return -EINVAL; + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } else { + cyan_skillfish_reg_base_init(adev); + adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; + adev->gfx.xcc_mask = 1; + adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3); + adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3); + adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1); + adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1); + adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0); + adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1); + adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1); + adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1); + adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8); + adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3); + adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3); + } + break; default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 35c778426a7c..51bab32fd8c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1196,13 +1196,14 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd); /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -1297,6 +1298,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct amdgpu_framebuffer *amdgpu_fb; @@ -1317,7 +1319,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ bo = gem_to_amdgpu_bo(obj); domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags); - if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { + if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); drm_gem_object_put(obj); return ERR_PTR(-EINVAL); @@ -1330,7 +1332,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, } ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv, - mode_cmd, obj); + info, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index dfa0d642ac16..930c171473b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,6 +44,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); const struct drm_format_info * amdgpu_lookup_format_info(u32 format, uint64_t modifier); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 44e120f9f764..8561ad7f6180 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return ret; } +static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int ret; + + /* + * Pin to keep buffer in place while it's vmap'ed. The actual + * domain is not that important as long as it's mapable. Using + * GTT and VRAM should be compatible with most use cases. + */ + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM); + if (ret) + return ret; + ret = drm_gem_dmabuf_vmap(dma_buf, map); + if (ret) + amdgpu_bo_unpin(bo); + + return ret; +} + +static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ + struct drm_gem_object *obj = dma_buf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + drm_gem_dmabuf_vunmap(dma_buf, map); + amdgpu_bo_unpin(bo); +} + const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, .pin = amdgpu_dma_buf_pin, @@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = { .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, - .vmap = drm_gem_dmabuf_vmap, - .vunmap = drm_gem_dmabuf_vunmap, + .vmap = amdgpu_dma_buf_vmap, + .vunmap = amdgpu_dma_buf_vunmap, }; /** @@ -313,11 +343,23 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); struct dma_buf *buf; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&bo->tbo, &ctx); + if (ret) + return ERR_PTR(ret); + buf = drm_gem_prime_export(gobj, flags); if (!IS_ERR(buf)) buf->ops = &amdgpu_dmabuf_ops; @@ -513,7 +555,7 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, if (!adev) return false; - if (obj->import_attach) { + if (drm_gem_is_imported(obj)) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; if (dma_buf->ops != &amdgpu_dmabuf_ops) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c index 3f3662e8b871..3040437d99c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -41,7 +41,8 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (index < adev->doorbell.num_kernel_doorbells) return readl(adev->doorbell.cpu_addr + index); - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n", + index); return 0; } @@ -63,7 +64,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (index < adev->doorbell.num_kernel_doorbells) writel(v, adev->doorbell.cpu_addr + index); else - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, + "writing beyond doorbell aperture: 0x%08x!\n", index); } /** @@ -83,7 +85,8 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (index < adev->doorbell.num_kernel_doorbells) return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index)); - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n", + index); return 0; } @@ -105,7 +108,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (index < adev->doorbell.num_kernel_doorbells) atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v); else - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + dev_err(adev->dev, + "writing beyond doorbell aperture: 0x%08x!\n", index); } /** @@ -166,7 +170,8 @@ int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev) NULL, (void **)&adev->doorbell.cpu_addr); if (r) { - DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r); + dev_err(adev->dev, + "Failed to allocate kernel doorbells, err=%d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4db92e0a60da..61268aa82df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -144,6 +144,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), AMDGPU_DEBUG_VM_USERPTR = BIT(8), + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9), + AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -361,12 +363,12 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint * The second one is for Compute. The third and fourth ones are * for SDMA and Video. * - * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. The timeout for compute is 60000. + * By default(with no lockup_timeout settings), the timeout for all jobs is 10000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " - "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " - "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); +MODULE_PARM_DESC(lockup_timeout, + "GPU lockup timeout in ms (default: 10000 for all jobs. " + "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** @@ -885,7 +887,7 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); /** * DOC: dcdebugmask (uint) - * Override display features enabled. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h. */ MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); @@ -959,7 +961,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); */ MODULE_PARM_DESC( freesync_video, - "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); + "Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)"); module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); /** @@ -2171,6 +2173,11 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, /* CYAN_SKILLFISH */ + {0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, @@ -2278,6 +2285,16 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: VM mode debug for userptr is enabled\n"); adev->debug_vm_userptr = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) { + pr_info("debug: disable kernel logs of correctable errors\n"); + adev->debug_disable_ce_logs = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) { + pr_info("debug: allowing command submission to CE engine\n"); + adev->debug_enable_ce_cs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2321,7 +2338,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, amdgpu_aspm = 0; if (amdgpu_virtual_display || - amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) + amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK)) supports_atomic = true; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { @@ -2451,10 +2468,10 @@ retry_init: if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ - if (amdgpu_device_supports_px(ddev)) + if (amdgpu_device_supports_px(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); /* we want direct complete for BOCO */ - if (amdgpu_device_supports_boco(ddev)) + if (amdgpu_device_supports_boco(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE | DPM_FLAG_SMART_SUSPEND | DPM_FLAG_MAY_SKIP_RESUME); @@ -2487,9 +2504,9 @@ retry_init: * into D0 state. Then there will be a PMFW-aware D-state * transition(D0->D3) on runpm suspend. */ - if (amdgpu_device_supports_baco(ddev) && + if (amdgpu_device_supports_baco(adev) && !(adev->flags & AMD_IS_APU) && - (adev->asic_type >= CHIP_NAVI10)) + adev->asic_type >= CHIP_NAVI10) amdgpu_get_secondary_funcs(adev); } @@ -2506,6 +2523,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_ras_eeprom_check_and_recover(adev); amdgpu_xcp_dev_unplug(adev); amdgpu_gmc_prepare_nps_mode_change(adev); drm_dev_unplug(dev); @@ -2535,6 +2553,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) if (amdgpu_ras_intr_triggered()) return; + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain @@ -2551,11 +2573,14 @@ static int amdgpu_pmops_prepare(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; + /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev) && - pm_runtime_suspended(dev)) + if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev)) return 1; /* if we will not support s3 or s2i for the device @@ -2570,7 +2595,7 @@ static int amdgpu_pmops_prepare(struct device *dev) static void amdgpu_pmops_complete(struct device *dev) { - /* nothing to do */ + amdgpu_device_complete(dev_get_drvdata(dev)); } static int amdgpu_pmops_suspend(struct device *dev) @@ -2583,6 +2608,7 @@ static int amdgpu_pmops_suspend(struct device *dev) else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; if (!adev->in_s0ix && !adev->in_s3) { +#if IS_ENABLED(CONFIG_SUSPEND) /* don't allow going deep first time followed by s2idle the next time */ if (adev->last_suspend_state != PM_SUSPEND_ON && adev->last_suspend_state != pm_suspend_target_state) { @@ -2590,11 +2616,14 @@ static int amdgpu_pmops_suspend(struct device *dev) pm_suspend_target_state); return -EINVAL; } +#endif return 0; } +#if IS_ENABLED(CONFIG_SUSPEND) /* cache the state last used for suspend */ adev->last_suspend_state = pm_suspend_target_state; +#endif return amdgpu_device_suspend(drm_dev, true); } @@ -2650,12 +2679,21 @@ static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + /* do not resume device if it's normal hibernation */ + if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend()) + return 0; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; return amdgpu_device_suspend(drm_dev, true); } @@ -2828,7 +2866,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) /* nothing to do */ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_enter(drm_dev); + amdgpu_device_baco_enter(adev); } dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); @@ -2869,7 +2907,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) pci_set_master(pdev); } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_exit(drm_dev); + amdgpu_device_baco_exit(adev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { @@ -2910,11 +2948,14 @@ static int amdgpu_drm_release(struct inode *inode, struct file *filp) { struct drm_file *file_priv = filp->private_data; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct drm_device *dev = file_priv->minor->dev; + int idx; - if (fpriv) { + if (fpriv && drm_dev_enter(dev, &idx)) { fpriv->evf_mgr.fd_closing = true; amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + drm_dev_exit(idx); } return drm_release(inode, filp); @@ -2941,15 +2982,15 @@ out: } static const struct dev_pm_ops amdgpu_pm_ops = { - .prepare = amdgpu_pmops_prepare, - .complete = amdgpu_pmops_complete, - .suspend = amdgpu_pmops_suspend, - .suspend_noirq = amdgpu_pmops_suspend_noirq, - .resume = amdgpu_pmops_resume, - .freeze = amdgpu_pmops_freeze, - .thaw = amdgpu_pmops_thaw, - .poweroff = amdgpu_pmops_poweroff, - .restore = amdgpu_pmops_restore, + .prepare = pm_sleep_ptr(amdgpu_pmops_prepare), + .complete = pm_sleep_ptr(amdgpu_pmops_complete), + .suspend = pm_sleep_ptr(amdgpu_pmops_suspend), + .suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq), + .resume = pm_sleep_ptr(amdgpu_pmops_resume), + .freeze = pm_sleep_ptr(amdgpu_pmops_freeze), + .thaw = pm_sleep_ptr(amdgpu_pmops_thaw), + .poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff), + .restore = pm_sleep_ptr(amdgpu_pmops_restore), .runtime_suspend = amdgpu_pmops_runtime_suspend, .runtime_resume = amdgpu_pmops_runtime_resume, .runtime_idle = amdgpu_pmops_runtime_idle, @@ -3021,6 +3062,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct drm_driver amdgpu_kms_driver = { @@ -3094,7 +3136,7 @@ static struct pci_driver amdgpu_kms_pci_driver = { .probe = amdgpu_pci_probe, .remove = amdgpu_pci_remove, .shutdown = amdgpu_pci_shutdown, - .driver.pm = &amdgpu_pm_ops, + .driver.pm = pm_ptr(&amdgpu_pm_ops), .err_handler = &amdgpu_pci_err_handler, .dev_groups = amdgpu_sysfs_groups, }; @@ -3107,10 +3149,6 @@ static int __init amdgpu_init(void) if (r) goto error_sync; - r = amdgpu_fence_slab_init(); - if (r) - goto error_fence; - r = amdgpu_userq_fence_slab_init(); if (r) goto error_fence; @@ -3145,7 +3183,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_fence_slab_fini(); amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 91d638098889..b349bb3676d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -70,6 +70,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [AMDGPU_PL_GWS] = "gws", [AMDGPU_PL_OA] = "oa", [AMDGPU_PL_DOORBELL] = "doorbell", + [AMDGPU_PL_MMIO_REMAP] = "mmioremap", }; unsigned int hw_ip, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8cecf25996ed..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -42,37 +42,6 @@ #include "amdgpu_reset.h" /* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - -static struct kmem_cache *amdgpu_fence_slab; - -int amdgpu_fence_slab_init(void) -{ - amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); - if (!amdgpu_fence_slab) - return -ENOMEM; - return 0; -} - -void amdgpu_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_fence_slab); -} -/* * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; @@ -130,14 +99,14 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object - * @job: job the fence is embedded in + * @af: amdgpu fence input * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, - unsigned int flags) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + struct amdgpu_fence *af, unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; @@ -146,40 +115,34 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd uint32_t seq; int r; - if (job == NULL) { - /* create a sperate hw fence */ - am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); - if (am_fence == NULL) + if (!af) { + /* create a separate hw fence */ + am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); + if (!am_fence) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { - /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = af; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; - if (job && job->job_run_counter) { - /* reinit seq for resubmitted jobs */ - fence->seqno = seq; - /* TO be inline with external fence creation and other drivers */ + am_fence->seq = seq; + if (af) { + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + /* Against remove in amdgpu_job_{free, free_cb} */ dma_fence_get(fence); } else { - if (job) { - dma_fence_init(fence, &amdgpu_job_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - /* Against remove in amdgpu_job_{free, free_cb} */ - dma_fence_get(fence); - } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, seq); - } + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_wptr(fence); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -292,6 +255,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; + struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -304,6 +268,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; + /* Save the wptr in the fence driver so we know what the last processed + * wptr was. This is required for re-emitting the ring state for + * queues that are reset but are not guilty and thus have no guilty fence. + */ + am_fence = container_of(fence, struct amdgpu_fence, base); + drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -326,7 +296,9 @@ static void amdgpu_fence_fallback(struct timer_list *t) fence_drv.fallback_timer); if (amdgpu_fence_process(ring)) - DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name); + dev_warn(ring->adev->dev, + "Fence fallback timer expired on ring %s\n", + ring->name); } /** @@ -718,7 +690,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -764,6 +736,122 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) amdgpu_fence_process(ring); } + +/* + * Kernel queue reset handling + * + * The driver can reset individual queues for most engines, but those queues + * may contain work from multiple contexts. Resetting the queue will reset + * lose all of that state. In order to minimize the collateral damage, the + * driver will save the ring contents which are not associated with the guilty + * context prior to resetting the queue. After resetting the queue the queue + * contents from the other contexts is re-emitted to the rings so that it can + * be processed by the engine. To handle this, we save the queue's write + * pointer (wptr) in the fences associated with each context. If we get a + * queue timeout, we can then use the wptrs from the fences to determine + * which data needs to be saved out of the queue's ring buffer. + */ + +/** + * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence + * + * @fence: fence of the ring to signal + * + */ +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) +{ + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); +} + +void amdgpu_fence_save_wptr(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + am_fence->wptr = am_fence->ring->wptr; +} + +static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, + u64 start_wptr, u32 end_wptr) +{ + unsigned int first_idx = start_wptr & ring->buf_mask; + unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int i; + + /* Backup the contents of the ring buffer. */ + for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; +} + +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + u64 wptr; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + wptr = ring->fence_drv.signalled_wptr; + ring->ring_backup_entries_to_copy = 0; + + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + /* save everything if the ring is not guilty, otherwise + * just save the content from other contexts. + */ + if (!guilty_fence || (fence->context != guilty_fence->context)) + amdgpu_ring_backup_unprocessed_command(ring, wptr, + fence->wptr); + wptr = fence->wptr; + } + rcu_read_unlock(); + } while (last_seq != seq); +} + /* * Common fence implementation */ @@ -780,7 +868,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +898,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -830,7 +918,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free fence_slab if it's separated fence*/ - kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); + kfree(to_amdgpu_fence(f)); } /** @@ -845,7 +933,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 1ae88c459da5..b0082aa7f3c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -144,7 +144,8 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) { - DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); + dev_warn(adev->dev, + "Cannot access FRU, EEPROM accessor not initialized"); return -ENODEV; } @@ -152,19 +153,22 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf, sizeof(buf)); if (len != 8) { - DRM_ERROR("Couldn't read the IPMI Common Header: %d", len); + dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d", + len); return len < 0 ? len : -EIO; } if (buf[0] != 1) { - DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]); + dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x", + buf[0]); return -EIO; } for (csum = 0; len > 0; len--) csum += buf[len - 1]; if (csum) { - DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum); + dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x", + csum); return -EIO; } @@ -179,12 +183,14 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) /* Read the header of the PIA. */ len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3); if (len != 3) { - DRM_ERROR("Couldn't read the Product Info Area header: %d", len); + dev_err(adev->dev, + "Couldn't read the Product Info Area header: %d", len); return len < 0 ? len : -EIO; } if (buf[0] != 1) { - DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]); + dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x", + buf[0]); return -EIO; } @@ -197,14 +203,16 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size); if (len != size) { kfree(pia); - DRM_ERROR("Couldn't read the Product Info Area: %d", len); + dev_err(adev->dev, "Couldn't read the Product Info Area: %d", + len); return len < 0 ? len : -EIO; } for (csum = 0; size > 0; size--) csum += pia[size - 1]; if (csum) { - DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x", + csum); kfree(pia); return -EIO; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e5e33a68d935..b7ebae289bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -317,7 +317,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, */ if (!vm->is_compute_context || !vm->process_info) return 0; - if (!obj->import_attach || + if (!drm_gem_is_imported(obj) || !dma_buf_is_dynamic(obj->import_attach->dmabuf)) return 0; mutex_lock_nested(&vm->process_info->lock, 1); @@ -443,15 +443,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, int r; /* reject invalid gem flags */ - if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | - AMDGPU_GEM_CREATE_EXPLICIT_SYNC | - AMDGPU_GEM_CREATE_ENCRYPTED | - AMDGPU_GEM_CREATE_GFX12_DCC | - AMDGPU_GEM_CREATE_DISCARDABLE)) + if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK) return -EINVAL; /* reject invalid gem domains */ @@ -466,6 +458,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, /* always clear VRAM */ flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP) + return -EINVAL; + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { @@ -577,8 +572,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, - &range); + r = amdgpu_ttm_tt_get_user_pages(bo, &range); if (r) goto release_object; @@ -586,6 +580,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto user_pages_done; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range); + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); @@ -791,36 +787,6 @@ error: return fence; } -/** - * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags - * - * @adev: amdgpu_device pointer - * @flags: GEM UAPI flags - * - * Returns the GEM UAPI flags mapped into hardware for the ASIC. - */ -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) -{ - uint64_t pte_flag = 0; - - if (flags & AMDGPU_VM_PAGE_EXECUTABLE) - pte_flag |= AMDGPU_PTE_EXECUTABLE; - if (flags & AMDGPU_VM_PAGE_READABLE) - pte_flag |= AMDGPU_PTE_READABLE; - if (flags & AMDGPU_VM_PAGE_WRITEABLE) - pte_flag |= AMDGPU_PTE_WRITEABLE; - if (flags & AMDGPU_VM_PAGE_PRT) - pte_flag |= AMDGPU_PTE_PRT_FLAG(adev); - if (flags & AMDGPU_VM_PAGE_NOALLOC) - pte_flag |= AMDGPU_PTE_NOALLOC; - - if (adev->gmc.gmc_funcs->map_mtype) - pte_flag |= amdgpu_gmc_map_mtype(adev, - flags & AMDGPU_VM_MTYPE_MASK); - - return pte_flag; -} - int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -841,7 +807,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct dma_fence_chain *timeline_chain = NULL; struct dma_fence *fence; struct drm_exec exec; - uint64_t va_flags; uint64_t vm_size; int r = 0; @@ -945,10 +910,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, - va_flags); + args->flags); break; case AMDGPU_VA_OP_UNMAP: r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address); @@ -960,10 +924,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - va_flags = amdgpu_gem_va_map_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, - va_flags); + args->flags); break; default: break; @@ -997,17 +960,34 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_gem_object *gobj; struct amdgpu_vm_bo_base *base; struct amdgpu_bo *robj; + struct drm_exec exec; + struct amdgpu_fpriv *fpriv = filp->driver_priv; int r; + if (args->padding) + return -EINVAL; + gobj = drm_gem_object_lookup(filp, args->handle); if (!gobj) return -ENOENT; robj = gem_to_amdgpu_bo(gobj); - r = amdgpu_bo_reserve(robj, false); - if (unlikely(r)) - goto out; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, gobj); + drm_exec_retry_on_contention(&exec); + if (r) + goto out_exec; + + if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (r) + goto out_exec; + } + } switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { @@ -1018,29 +998,26 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, info.alignment = robj->tbo.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; - amdgpu_bo_unreserve(robj); + drm_exec_fini(&exec); if (copy_to_user(out, &info, sizeof(info))) r = -EFAULT; break; } case AMDGPU_GEM_OP_SET_PLACEMENT: - if (robj->tbo.base.import_attach && + if (drm_gem_is_imported(&robj->tbo.base) && args->value & AMDGPU_GEM_DOMAIN_VRAM) { r = -EINVAL; - amdgpu_bo_unreserve(robj); - break; + goto out_exec; } if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) { r = -EPERM; - amdgpu_bo_unreserve(robj); - break; + goto out_exec; } for (base = robj->vm_bo; base; base = base->next) if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev), amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) { r = -EINVAL; - amdgpu_bo_unreserve(robj); - goto out; + goto out_exec; } @@ -1053,17 +1030,146 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) amdgpu_vm_bo_invalidate(robj, true); + drm_exec_fini(&exec); + break; + case AMDGPU_GEM_OP_GET_MAPPING_INFO: { + struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj); + struct drm_amdgpu_gem_vm_entry *vm_entries; + struct amdgpu_bo_va_mapping *mapping; + int num_mappings = 0; + /* + * num_entries is set as an input to the size of the user-allocated array of + * drm_amdgpu_gem_vm_entry stored at args->value. + * num_entries is sent back as output as the number of mappings the bo has. + * If that number is larger than the size of the array, the ioctl must + * be retried. + */ + vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL); + if (!vm_entries) + return -ENOMEM; + + amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) { + if (num_mappings < args->num_entries) { + vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].offset = mapping->offset; + vm_entries[num_mappings].flags = mapping->flags; + } + num_mappings += 1; + } + + amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) { + if (num_mappings < args->num_entries) { + vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE; + vm_entries[num_mappings].offset = mapping->offset; + vm_entries[num_mappings].flags = mapping->flags; + } + num_mappings += 1; + } + + drm_exec_fini(&exec); + + if (num_mappings > 0 && num_mappings <= args->num_entries) + if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries))) + r = -EFAULT; + + args->num_entries = num_mappings; - amdgpu_bo_unreserve(robj); + kvfree(vm_entries); break; + } default: - amdgpu_bo_unreserve(robj); + drm_exec_fini(&exec); r = -EINVAL; } -out: drm_gem_object_put(gobj); return r; +out_exec: + drm_exec_fini(&exec); + drm_gem_object_put(gobj); + return r; +} + +/** + * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects + * + * @dev: drm device pointer + * @data: drm_amdgpu_gem_list_handles + * @filp: drm file pointer + * + * num_entries is set as an input to the size of the entries array. + * num_entries is sent back as output as the number of bos in the process. + * If that number is larger than the size of the array, the ioctl must + * be retried. + * + * Returns: + * 0 for success, -errno for errors. + */ +int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_amdgpu_gem_list_handles *args = data; + struct drm_amdgpu_gem_list_handles_entry *bo_entries; + struct drm_gem_object *gobj; + int id, ret = 0; + int bo_index = 0; + int num_bos = 0; + + spin_lock(&filp->table_lock); + idr_for_each_entry(&filp->object_idr, gobj, id) + num_bos += 1; + spin_unlock(&filp->table_lock); + + if (args->num_entries < num_bos) { + args->num_entries = num_bos; + return 0; + } + + if (num_bos == 0) { + args->num_entries = 0; + return 0; + } + + bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL); + if (!bo_entries) + return -ENOMEM; + + spin_lock(&filp->table_lock); + idr_for_each_entry(&filp->object_idr, gobj, id) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + struct drm_amdgpu_gem_list_handles_entry *bo_entry; + + if (bo_index >= num_bos) { + ret = -EAGAIN; + break; + } + + bo_entry = &bo_entries[bo_index]; + + bo_entry->size = amdgpu_bo_size(bo); + bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK; + bo_entry->preferred_domains = bo->preferred_domains; + bo_entry->gem_handle = id; + bo_entry->alignment = bo->tbo.page_alignment; + + if (bo->tbo.base.import_attach) + bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT; + + bo_index += 1; + } + spin_unlock(&filp->table_lock); + + args->num_entries = bo_index; + + if (!ret) + if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries))) + ret = -EFAULT; + + kvfree(bo_entries); + + return ret; } static int amdgpu_gem_align_pitch(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index 3a8f57900a3a..b558336bc4c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -63,13 +63,28 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags); int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +#define AMDGPU_GEM_CREATE_SETTABLE_MASK (AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \ + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \ + AMDGPU_GEM_CREATE_CPU_GTT_USWC | \ + AMDGPU_GEM_CREATE_VRAM_CLEARED | \ + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \ + AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \ + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \ + AMDGPU_GEM_CREATE_ENCRYPTED | \ + AMDGPU_GEM_CREATE_GFX12_DCC | \ + AMDGPU_GEM_CREATE_DISCARDABLE | \ + AMDGPU_GEM_CREATE_COHERENT | \ + AMDGPU_GEM_CREATE_UNCACHED | \ + AMDGPU_GEM_CREATE_EXT_COHERENT) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c5646af055ab..ebe2b4c68b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -149,7 +149,7 @@ static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) { if (amdgpu_compute_multipipe != -1) { - DRM_INFO("amdgpu: forcing compute pipe policy %d\n", + dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n", amdgpu_compute_multipipe); return amdgpu_compute_multipipe == 1; } @@ -674,7 +674,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) * generation exposes more than 64 queues. If so, the * definition of queue_mask needs updating */ if (WARN_ON(i > (sizeof(queue_mask)*8))) { - DRM_ERROR("Invalid KCQ enabled: %d\n", i); + dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i); break; } @@ -683,15 +683,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) amdgpu_device_flush_hdp(adev, NULL); - DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, - kiq_ring->queue); + dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me, + kiq_ring->pipe, kiq_ring->queue); spin_lock(&kiq->ring_lock); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_compute_rings + kiq->pmf->set_resources_size); if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); + dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); spin_unlock(&kiq->ring_lock); return r; } @@ -712,7 +712,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KCQ enable failed\n"); + dev_err(adev->dev, "KCQ enable failed\n"); return r; } @@ -734,7 +734,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_mes_map_legacy_queue(adev, &adev->gfx.gfx_ring[j]); if (r) { - DRM_ERROR("failed to map gfx queue\n"); + dev_err(adev->dev, "failed to map gfx queue\n"); return r; } } @@ -748,7 +748,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_gfx_rings); if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); + dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r); spin_unlock(&kiq->ring_lock); return r; } @@ -769,7 +769,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KGQ enable failed\n"); + dev_err(adev->dev, "KGQ enable failed\n"); return r; } @@ -1030,7 +1030,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, ih_data.head = *ras_if; - DRM_ERROR("CP ECC ERROR IRQ\n"); + dev_err(adev->dev, "CP ECC ERROR IRQ\n"); amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } @@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) + goto failed_kiq_read; + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } @@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + if (amdgpu_in_reset(adev)) + goto failed_kiq_write; msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -1474,7 +1479,8 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) owner = (void *)(unsigned long)atomic_inc_return(&counter); r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, - 64, 0, &job); + 64, 0, &job, + AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER); if (r) goto err; @@ -2279,7 +2285,7 @@ void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) * Return: * return the latest index. */ -u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) +u32 amdgpu_gfx_csb_preamble_start(u32 *buffer) { u32 count = 0; @@ -2303,7 +2309,7 @@ u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer) * Return: * return the latest index. */ -u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count) +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count) { const struct cs_section_def *sect = NULL; const struct cs_extent_def *ext = NULL; @@ -2330,7 +2336,7 @@ u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, * @buffer: This is an output variable that gets the PACKET3 preamble end. * @count: Index to start set the preemble end. */ -void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count) +void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count) { buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 08f268dab8f5..fb5f7a0ee029 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -642,9 +642,9 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); -u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer); -u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count); -void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count); +u32 amdgpu_gfx_csb_preamble_start(u32 *buffer); +u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count); +void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count); void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 6b0fbbb91e57..9dcf51991b5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -38,6 +38,13 @@ #include <drm/drm_drv.h> #include <drm/ttm/ttm_tt.h> +static const u64 four_gb = 0x100000000ULL; + +bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev) +{ + return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev); +} + /** * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 * @@ -251,10 +258,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1; mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id; mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1; - mc->gart_start = hive_vram_end + 1; + /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */ + mc->gart_start = ALIGN(hive_vram_end + 1, four_gb); mc->gart_end = mc->gart_start + mc->gart_size - 1; - mc->fb_start = hive_vram_start; - mc->fb_end = hive_vram_end; + if (amdgpu_virt_xgmi_migrate_enabled(adev)) { + /* set mc->vram_start to 0 to switch the returned GPU address of + * amdgpu_bo_create_reserved() from FB aperture to GART aperture. + */ + mc->vram_start = 0; + mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size); + } else { + mc->fb_start = hive_vram_start; + mc->fb_end = hive_vram_end; + } dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", mc->mc_vram_size >> 20, mc->vram_start, mc->vram_end, mc->real_vram_size >> 20); @@ -276,7 +293,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, enum amdgpu_gart_placement gart_placement) { - const uint64_t four_gb = 0x100000000ULL; u64 size_af, size_bf; /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); @@ -674,7 +690,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); + &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB); if (r) goto error_alloc; @@ -1041,9 +1057,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) */ u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21; - u64 vram_addr = adev->vm_manager.vram_base_offset - - adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - u64 vram_end = vram_addr + vram_size; + u64 vram_addr, vram_end; u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo); int idx; @@ -1056,6 +1070,11 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev) flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1)); flags |= AMDGPU_PDE_PTE_FLAG(adev); + vram_addr = adev->vm_manager.vram_base_offset; + if (!amdgpu_virt_xgmi_migrate_enabled(adev)) + vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + vram_end = vram_addr + vram_size; + /* The first n PDE0 entries are used as PTE, * pointing to vram */ @@ -1429,3 +1448,232 @@ bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev) return false; } + +enum amdgpu_memory_partition +amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev) +{ + switch (adev->gmc.num_mem_partitions) { + case 0: + return UNKNOWN_MEMORY_PARTITION_MODE; + case 1: + return AMDGPU_NPS1_PARTITION_MODE; + case 2: + return AMDGPU_NPS2_PARTITION_MODE; + case 4: + return AMDGPU_NPS4_PARTITION_MODE; + case 8: + return AMDGPU_NPS8_PARTITION_MODE; + default: + return AMDGPU_NPS1_PARTITION_MODE; + } +} + +enum amdgpu_memory_partition +amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs && + adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev, + supp_modes); + else + dev_warn(adev->dev, "memory partition mode query is not supported\n"); + + return mode; +} + +enum amdgpu_memory_partition +amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return amdgpu_gmc_get_vf_memory_partition(adev); + else + return amdgpu_gmc_get_memory_partition(adev, NULL); +} + +static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode; + u32 supp_modes; + bool valid; + + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware not present in supported modes */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && + !(BIT(mode - 1) & supp_modes)) + return false; + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 1); + break; + case AMDGPU_NPS2_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 2); + break; + case AMDGPU_NPS4_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 3 || + adev->gmc.num_mem_partitions == 4); + break; + case AMDGPU_NPS8_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 8); + break; + default: + valid = false; + } + + return valid; +} + +static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid) +{ + int i; + + /* Check if node with id 'nid' is present in 'node_ids' array */ + for (i = 0; i < num_ids; ++i) + if (node_ids[i] == nid) + return true; + + return false; +} + +static void +amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + struct amdgpu_numa_info numa_info; + int node_ids[AMDGPU_MAX_MEM_RANGES]; + int num_ranges = 0, ret; + int num_xcc, xcc_id; + uint32_t xcc_mask; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + xcc_mask = (1U << num_xcc) - 1; + + for_each_inst(xcc_id, xcc_mask) { + ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + if (ret) + continue; + + if (numa_info.nid == NUMA_NO_NODE) { + mem_ranges[0].size = numa_info.size; + mem_ranges[0].numa.node = numa_info.nid; + num_ranges = 1; + break; + } + + if (amdgpu_gmc_is_node_present(node_ids, num_ranges, + numa_info.nid)) + continue; + + node_ids[num_ranges] = numa_info.nid; + mem_ranges[num_ranges].numa.node = numa_info.nid; + mem_ranges[num_ranges].size = numa_info.size; + ++num_ranges; + } + + adev->gmc.num_mem_partitions = num_ranges; +} + +void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + enum amdgpu_memory_partition mode; + u32 start_addr = 0, size; + int i, r, l; + + mode = amdgpu_gmc_query_memory_partition(adev); + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + adev->gmc.num_mem_partitions = 0; + break; + case AMDGPU_NPS1_PARTITION_MODE: + adev->gmc.num_mem_partitions = 1; + break; + case AMDGPU_NPS2_PARTITION_MODE: + adev->gmc.num_mem_partitions = 2; + break; + case AMDGPU_NPS4_PARTITION_MODE: + if (adev->flags & AMD_IS_APU) + adev->gmc.num_mem_partitions = 3; + else + adev->gmc.num_mem_partitions = 4; + break; + case AMDGPU_NPS8_PARTITION_MODE: + adev->gmc.num_mem_partitions = 8; + break; + default: + adev->gmc.num_mem_partitions = 1; + break; + } + + /* Use NPS range info, if populated */ + r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, + &adev->gmc.num_mem_partitions); + if (!r) { + l = 0; + for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { + if (mem_ranges[i].range.lpfn > + mem_ranges[i - 1].range.lpfn) + l = i; + } + + } else { + if (!adev->gmc.num_mem_partitions) { + dev_warn(adev->dev, + "Not able to detect NPS mode, fall back to NPS1\n"); + adev->gmc.num_mem_partitions = 1; + } + /* Fallback to sw based calculation */ + size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = + ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } + + l = adev->gmc.num_mem_partitions - 1; + } + + /* Adjust the last one */ + mem_ranges[l].range.lpfn = + (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; + mem_ranges[l].size = + adev->gmc.real_vram_size - + ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); +} + +int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev) +{ + bool valid; + + adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES, + sizeof(struct amdgpu_mem_partition_info), + GFP_KERNEL); + if (!adev->gmc.mem_partitions) + return -ENOMEM; + + if (adev->gmc.is_app_apu) + amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); + else + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + + if (amdgpu_sriov_vf(adev)) + valid = true; + else + valid = amdgpu_gmc_validate_partition_info(adev); + if (!valid) { + /* TODO: handle invalid case */ + dev_warn(adev->dev, + "Mem ranges not matching with hardware config\n"); + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 80fa29c26e9e..55097ca10738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -84,6 +84,8 @@ enum amdgpu_memory_partition { #define AMDGPU_GMC_INIT_RESET_NPS BIT(0) +#define AMDGPU_MAX_MEM_RANGES 8 + /* * GMC page fault information */ @@ -152,15 +154,15 @@ struct amdgpu_gmc_funcs { unsigned pasid); /* enable/disable PRT support */ void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* map mtype to hardware flags */ - uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ void (*get_vm_pde)(struct amdgpu_device *adev, int level, u64 *dst, u64 *flags); - /* get the pte flags to use for a BO VA mapping */ + /* get the pte flags to use for PTEs */ void (*get_vm_pte)(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, - uint64_t *flags); + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, + uint64_t *pte_flags); /* override per-page pte flags */ void (*override_vm_pte_flags)(struct amdgpu_device *dev, struct amdgpu_vm *vm, @@ -354,9 +356,10 @@ struct amdgpu_gmc { #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \ + ((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \ + (pte_flags))) #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) @@ -394,6 +397,7 @@ static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) return addr; } +bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev); int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev); void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags); @@ -455,5 +459,13 @@ int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev, int nps_mode); void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev); bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev); - +enum amdgpu_memory_partition +amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev); +enum amdgpu_memory_partition +amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes); +enum amdgpu_memory_partition +amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev); +int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev); +void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index e36fede7f74c..2c6a6b858112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -167,13 +167,12 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo) int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range) { struct hmm_range *hmm_range; unsigned long end; unsigned long timeout; - unsigned long i; unsigned long *pfns; int r = 0; @@ -222,14 +221,6 @@ retry: hmm_range->start = start; hmm_range->hmm_pfns = pfns; - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; pages && i < npages; i++) - pages[i] = hmm_pfn_to_page(pfns[i]); - *phmm_range = hmm_range; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index e2edcd010ccc..953e1d06de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -33,7 +33,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, uint64_t start, uint64_t npages, bool readonly, - void *owner, struct page **pages, + void *owner, struct hmm_range **phmm_range); bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 8179d0814db9..9cb72f0c5277 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -24,7 +24,6 @@ * Alex Deucher */ -#include <linux/export.h> #include <linux/pci.h> #include <drm/drm_edid.h> @@ -185,7 +184,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, snprintf(i2c->adapter.name, sizeof(i2c->adapter.name), "AMDGPU i2c hw bus %s", name); i2c->adapter.algo = &amdgpu_atombios_i2c_algo; - ret = i2c_add_adapter(&i2c->adapter); + ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter); if (ret) goto out_free; } else { @@ -216,15 +215,6 @@ out_free: } -void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) -{ - if (!i2c) - return; - WARN_ON(i2c->has_aux); - i2c_del_adapter(&i2c->adapter); - kfree(i2c); -} - void amdgpu_i2c_init(struct amdgpu_device *adev) { if (!adev->is_atom_fw) { @@ -249,12 +239,9 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev) { int i; - for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) { - if (adev->i2c_bus[i]) { - amdgpu_i2c_destroy(adev->i2c_bus[i]); + for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) + if (adev->i2c_bus[i]) adev->i2c_bus[i] = NULL; - } - } } /* looks up bus based on id */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 802743efa3b3..7d9bcb72e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -128,6 +128,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; + struct amdgpu_fence *af; bool need_ctx_switch; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -138,7 +139,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; unsigned int cond_exec; - unsigned int i; int r = 0; @@ -154,6 +154,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = job->csa_va; gds_va = job->gds_va; init_shadow = job->init_shadow; + af = &job->hw_fence; + /* Save the context of the job for reset handling. + * The driver needs this so it can skip the ring + * contents for guilty contexts. + */ + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } else { vm = NULL; fence_ctx = 0; @@ -161,6 +167,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, csa_va = 0; gds_va = 0; init_shadow = false; + af = NULL; } if (!ring->sched.ready) { @@ -282,7 +289,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } - r = amdgpu_fence_emit(ring, f, job, fence_flags); + r = amdgpu_fence_emit(ring, f, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -304,8 +311,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + /* Save the wptr associated with this fence. + * This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence so we know what rings contents to backup + * after we reset the queue. + */ + amdgpu_fence_save_wptr(*f); + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 5dd78a9cb12d..3ef5bc95642c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -275,13 +275,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->vm_hub; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; bool needs_flush = vm->use_cpu_for_update; uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; - *id = id_mgr->reserved; + *id = vm->reserved_vmid[vmhub]; if ((*id)->owner != vm->immediate.fence_context || !amdgpu_vmid_compatible(*id, job) || (*id)->flushed_updates < updates || @@ -474,40 +473,61 @@ bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) return vm->reserved_vmid[vmhub]; } -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm + * @adev: amdgpu device structure + * @vm: the VM to reserve an ID for + * @vmhub: the VMHUB which should be used + * + * Mostly used to have a reserved VMID for debugging and SPM. + * + * Returns: 0 for success, -ENOENT if an ID is already reserved. + */ +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id; + int r = 0; mutex_lock(&id_mgr->lock); - - ++id_mgr->reserved_use_count; - if (!id_mgr->reserved) { - struct amdgpu_vmid *id; - - id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, - list); - /* Remove from normal round robin handling */ - list_del_init(&id->list); - id_mgr->reserved = id; + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (id_mgr->reserved_vmid) { + r = -ENOENT; + goto unlock; } - + /* Remove from normal round robin handling */ + id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); + list_del_init(&id->list); + vm->reserved_vmid[vmhub] = id; + id_mgr->reserved_vmid = true; mutex_unlock(&id_mgr->lock); + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; } -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, +/* + * amdgpu_vmid_free_reserved - free up a reserved VMID again + * @adev: amdgpu device structure + * @vm: the VM with the reserved ID + * @vmhub: the VMHUB which should be used + */ +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (!--id_mgr->reserved_use_count) { - /* give the reserved ID back to normal round robin */ - list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); - id_mgr->reserved = NULL; + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + id_mgr->reserved_vmid = false; } - mutex_unlock(&id_mgr->lock); } @@ -574,7 +594,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); - id_mgr->reserved_use_count = 0; /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) @@ -594,11 +613,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } - /* alloc a default reserved vmid to enforce isolation */ - for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { - if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..b3649cd3af56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -67,8 +67,7 @@ struct amdgpu_vmid_mgr { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; - struct amdgpu_vmid *reserved; - unsigned int reserved_use_count; + bool reserved_vmid; }; int amdgpu_pasid_alloc(unsigned int bits); @@ -79,10 +78,10 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); -int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - unsigned vmhub); -void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - unsigned vmhub); +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 30f16968b578..a6419246e9c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -218,7 +218,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) restart_ih: count = AMDGPU_IH_MAX_NUM_IVS; - DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); + dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 7f7ea046e209..f58b6be7fccc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -56,14 +56,14 @@ struct amdgpu_ih_ring { bool use_bus_addr; struct amdgpu_bo *ring_obj; - volatile uint32_t *ring; + uint32_t *ring; uint64_t gpu_addr; uint64_t wptr_addr; - volatile uint32_t *wptr_cpu; + uint32_t *wptr_cpu; uint64_t rptr_addr; - volatile uint32_t *rptr_cpu; + uint32_t *rptr_cpu; bool enabled; unsigned rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c new file mode 100644 index 000000000000..99e1cf4fc955 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c @@ -0,0 +1,96 @@ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ip.h" + +static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int8_t inst) +{ + int8_t dev_inst; + + switch (block) { + case GC_HWIP: + case SDMA0_HWIP: + /* Both JPEG and VCN as JPEG is only alias of VCN */ + case VCN_HWIP: + dev_inst = adev->ip_map.dev_inst[block][inst]; + break; + default: + /* For rest of the IPs, no look up required. + * Assume 'logical instance == physical instance' for all configs. */ + dev_inst = inst; + break; + } + + return dev_inst; +} + +static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + uint32_t mask) +{ + uint32_t dev_mask = 0; + int8_t log_inst, dev_inst; + + while (mask) { + log_inst = ffs(mask) - 1; + dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst); + dev_mask |= (1 << dev_inst); + mask &= ~(1 << log_inst); + } + + return dev_mask; +} + +static void amdgpu_populate_ip_map(struct amdgpu_device *adev, + enum amd_hw_ip_block_type ip_block, + uint32_t inst_mask) +{ + int l = 0, i; + + while (inst_mask) { + i = ffs(inst_mask) - 1; + adev->ip_map.dev_inst[ip_block][l++] = i; + inst_mask &= ~(1 << i); + } + for (; l < HWIP_MAX_INSTANCE; l++) + adev->ip_map.dev_inst[ip_block][l] = -1; +} + +void amdgpu_ip_map_init(struct amdgpu_device *adev) +{ + u32 ip_map[][2] = { + { GC_HWIP, adev->gfx.xcc_mask }, + { SDMA0_HWIP, adev->sdma.sdma_mask }, + { VCN_HWIP, adev->vcn.inst_mask }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(ip_map); ++i) + amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]); + + adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst; + adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h index 0d878ca3acba..2490fd322aec 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2025 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +21,9 @@ * */ -#ifndef __DCE_V11_0_H__ -#define __DCE_V11_0_H__ +#ifndef __AMDGPU_IP_H__ +#define __AMDGPU_IP_H__ -extern const struct amdgpu_ip_block_version dce_v11_0_ip_block; -extern const struct amdgpu_ip_block_version dce_v11_2_ip_block; +void amdgpu_ip_map_init(struct amdgpu_device *adev); -void dce_v11_0_disable_dce(struct amdgpu_device *adev); - -#endif +#endif /* __AMDGPU_IP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 13c60cac4261..8112ffc85995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -142,8 +142,9 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) r = src->funcs->set(adev, src, k, AMDGPU_IRQ_STATE_DISABLE); if (r) - DRM_ERROR("error disabling interrupt (%d)\n", - r); + dev_err(adev->dev, + "error disabling interrupt (%d)\n", + r); } } } @@ -242,7 +243,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) return true; } -static void amdgpu_restore_msix(struct amdgpu_device *adev) +void amdgpu_restore_msix(struct amdgpu_device *adev) { u16 ctrl; @@ -315,7 +316,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; - DRM_DEBUG("amdgpu: irq initialized.\n"); + dev_dbg(adev->dev, "amdgpu: irq initialized.\n"); return 0; free_vectors: @@ -461,10 +462,10 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, src_id = entry.src_id; if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { - DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); + dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id); } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { - DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); + dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id); } else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) || (client_id == SOC15_IH_CLIENTID_ISP)) && @@ -472,18 +473,21 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, generic_handle_domain_irq(adev->irq.domain, src_id); } else if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); + dev_dbg(adev->dev, + "Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, &entry); if (r < 0) - DRM_ERROR("error processing interrupt (%d)\n", r); + dev_err(adev->dev, "error processing interrupt (%d)\n", + r); else if (r) handled = true; } else { - DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n", + dev_dbg(adev->dev, + "Unregistered interrupt src_id: %d of client_id:%d\n", src_id, client_id); } @@ -620,7 +624,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type) { /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */ - if (amdgpu_ras_is_rma(adev)) + if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type)) return -EINVAL; if (!adev->irq.installed) @@ -732,7 +736,7 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev) adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID, &amdgpu_hw_irqdomain_ops, adev); if (!adev->irq.domain) { - DRM_ERROR("GPU irq add domain failed\n"); + dev_err(adev->dev, "GPU irq add domain failed\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4..9f0417456abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -146,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev); int amdgpu_irq_add_domain(struct amdgpu_device *adev); void amdgpu_irq_remove_domain(struct amdgpu_device *adev); unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id); +void amdgpu_restore_msix(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 43fc941dfa57..9cddbf50442a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -33,6 +33,8 @@ #include "isp_v4_1_0.h" #include "isp_v4_1_1.h" +#define ISP_MC_ADDR_ALIGN (1024 * 32) + /** * isp_hw_init - start and test isp block * @@ -141,6 +143,179 @@ static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, return 0; } +static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev) +{ + if (isp_parent != amdgpu_dev) + return -EINVAL; + + return 0; +} + +/** + * isp_user_buffer_alloc - create user buffer object (BO) for isp + * + * @dev: isp device handle + * @dmabuf: DMABUF handle for isp buffer allocated in system memory + * @buf_obj: GPU buffer object handle to initialize + * @buf_addr: GPU addr of the pinned BO to initialize + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate GPU addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_user_buffer_alloc(struct device *dev, void *dmabuf, + void **buf_obj, u64 *buf_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + struct amdgpu_bo *bo; + u64 gpu_addr; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!buf_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_isp_user(adev, dmabuf, + AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr); + if (ret) { + drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret); + return ret; + } + + *buf_obj = (void *)bo; + *buf_addr = gpu_addr; + + return 0; +} +EXPORT_SYMBOL(isp_user_buffer_alloc); + +/** + * isp_user_buffer_free - free isp user buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void isp_user_buffer_free(void *buf_obj) +{ + amdgpu_bo_free_isp_user(buf_obj); +} +EXPORT_SYMBOL(isp_user_buffer_free); + +/** + * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp + * + * @dev: isp device handle + * @size: size for the new BO + * @buf_obj: GPU BO handle to initialize + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: CPU address mapping of BO + * + * Allocates and pins a kernel BO for internal isp firmware use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_kernel_buffer_alloc(struct device *dev, u64 size, + void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!gpu_addr)) + return -EINVAL; + + if (WARN_ON(!cpu_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, + size, + ISP_MC_ADDR_ALIGN, + AMDGPU_GEM_DOMAIN_GTT, + bo, + gpu_addr, + cpu_addr); + if (!cpu_addr || ret) { + drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(isp_kernel_buffer_alloc); + +/** + * isp_kernel_buffer_free - free isp kernel buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * @gpu_addr: GPU addr of isp kernel BO + * @cpu_addr: CPU addr of isp kernel BO + * + * unmaps and unpin a isp kernel BO. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. + */ +void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + + amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr); +} +EXPORT_SYMBOL(isp_kernel_buffer_free); + static const struct amd_ip_funcs isp_ip_funcs = { .name = "isp_ip", .early_init = isp_early_init, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 4f3b7b5d9c1f..d6f4ffa4c97c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -28,16 +28,13 @@ #ifndef __AMDGPU_ISP_H__ #define __AMDGPU_ISP_H__ +#include <drm/amd/isp.h> +#include <linux/pm_domain.h> + #define ISP_REGS_OFFSET_END 0x629A4 struct amdgpu_isp; -struct isp_platform_data { - void *adev; - u32 asic_type; - resource_size_t base_rmmio_size; -}; - struct isp_funcs { int (*hw_init)(struct amdgpu_isp *isp); int (*hw_fini)(struct amdgpu_isp *isp); @@ -54,6 +51,7 @@ struct amdgpu_isp { struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; + struct generic_pm_domain ispgpd; }; extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1e24590ae144..d020a890a0ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -90,10 +90,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); struct drm_wedge_task_info *info = NULL; - struct amdgpu_task_info *ti; + struct amdgpu_task_info *ti = NULL; struct amdgpu_device *adev = ring->adev; - int idx; - int r; + int idx, r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s", @@ -113,6 +112,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_job_core_dump(adev, job); if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { dev_err(adev->dev, "ring %s timeout, but soft recovered\n", s_job->sched->name); @@ -132,47 +132,24 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* attempt a per ring reset */ if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "Ring reset disabled by debug mask\n"); - } else if (amdgpu_gpu_recovery && ring->funcs->reset) { - bool is_guilty; - - dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); - /* stop the scheduler, but don't mess with the - * bad job yet because if ring reset fails - * we'll fall back to full GPU reset. - */ - drm_sched_wqueue_stop(&ring->sched); - - /* for engine resets, we need to reset the engine, - * but individual queues may be unaffected. - * check here to make sure the accounting is correct. - */ - if (ring->funcs->is_guilty) - is_guilty = ring->funcs->is_guilty(ring); - else - is_guilty = true; - - if (is_guilty) - dma_fence_set_error(&s_job->s_fence->finished, -ETIME); - - r = amdgpu_ring_reset(ring, job->vmid); + } else if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) && + ring->funcs->reset) { + dev_err(adev->dev, "Starting %s ring reset\n", + s_job->sched->name); + r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); if (!r) { - if (amdgpu_ring_sched_ready(ring)) - drm_sched_stop(&ring->sched, s_job); - if (is_guilty) { - atomic_inc(&ring->adev->gpu_reset_counter); - amdgpu_fence_driver_force_completion(ring); - } - if (amdgpu_ring_sched_ready(ring)) - drm_sched_start(&ring->sched, 0); - dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); - drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, info); + atomic_inc(&ring->adev->gpu_reset_counter); + dev_err(adev->dev, "Ring %s reset succeeded\n", + ring->sched.name); + drm_dev_wedged_event(adev_to_drm(adev), + DRM_WEDGE_RECOVERY_NONE, info); goto exit; } - dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); + dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name); } - dma_fence_set_error(&s_job->s_fence->finished, -ETIME); - amdgpu_vm_put_task_info(ti); + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; @@ -199,8 +176,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -231,11 +209,12 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job) + struct amdgpu_job **job, u64 k_job_id) { int r; - r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, 0); + r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job, + k_job_id); if (r) return r; @@ -275,8 +254,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -293,10 +272,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -325,10 +304,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) @@ -387,13 +366,6 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } - /* - * The VM structure might be released after the VMID is - * assigned, we had multiple problems with people trying to use - * the VM pointer so better set it to NULL. - */ - if (!fence) - job->vm = NULL; return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 529045d60412..4a6487eb6cb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -44,11 +44,27 @@ struct amdgpu_fence; enum amdgpu_ib_pool_type; +/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */ +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE (18446744073709551615ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES (18446744073709551614ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE (18446744073709551613ULL) +#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR (18446744073709551612ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER (18446744073709551611ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER (18446744073709551609ULL) +#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE (18446744073709551608ULL) +#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT (18446744073709551607ULL) +#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER (18446744073709551606ULL) +#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER (18446744073709551605ULL) +#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL) +#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL) +#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL) + struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; @@ -96,7 +112,8 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, struct drm_sched_entity *entity, void *owner, size_t size, enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job); + struct amdgpu_job **job, + u64 k_job_id); void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index dda29132dfb2..63ee6ba6a931 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -121,10 +121,12 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]); } - if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) + if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) { + mutex_lock(&adev->jpeg.jpeg_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, AMD_PG_STATE_GATE); - else + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + } else schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT); } @@ -194,7 +196,8 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -368,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - if (val & (1 << ((i * adev->jpeg.num_jpeg_rings) + j))) + if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j))) ring->sched.ready = true; else ring->sched.ready = false; @@ -463,7 +466,8 @@ int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count, sizeof(uint32_t), GFP_KERNEL); if (!adev->jpeg.ip_dump) { - DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n"); + dev_err(adev->dev, + "Failed to allocate memory for JPEG IP Dump\n"); return -ENOMEM; } adev->jpeg.reg_list = reg; @@ -536,3 +540,68 @@ void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_pri drm_printf(p, "\nInactive Instance:JPEG%d\n", i); } } + +static inline bool amdgpu_jpeg_reg_valid(u32 reg) +{ + if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END || + (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END)) + return false; + else + return true; +} + +/** + * amdgpu_jpeg_dec_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ + +int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || + !amdgpu_jpeg_reg_valid(reg)) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || + !amdgpu_jpeg_reg_valid(reg)) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 4f0775e39b54..346ae0ab09d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -25,11 +25,18 @@ #define __AMDGPU_JPEG_H__ #include "amdgpu_ras.h" +#include "amdgpu_cs.h" #define AMDGPU_MAX_JPEG_INSTANCES 4 #define AMDGPU_MAX_JPEG_RINGS 10 #define AMDGPU_MAX_JPEG_RINGS_4_0_3 8 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 +#define JPEG_ATOMIC_RANGE_START 0x4120 +#define JPEG_ATOMIC_RANGE_END 0x412A + + #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) @@ -170,5 +177,8 @@ int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, const struct amdgpu_hwip_reg_entry *reg, u32 count); void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block); void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p); +int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d2ce7d86dbc8..b3e6b3fcdf2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev->rmmio == NULL) return; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD)) DRM_WARN("smart shift update failed\n"); amdgpu_acpi_fini(adev); @@ -161,7 +161,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (acpi_status) dev_dbg(dev->dev, "Error during ACPI methods call\n"); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD)) DRM_WARN("smart shift update failed\n"); out: @@ -399,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, uint32_t ib_size_alignment = 0; enum amd_ip_block_type type; unsigned int num_rings = 0; + uint32_t num_slots = 0; unsigned int i, j; if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) @@ -411,6 +412,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->gfx.gfx_ring[i].sched.ready && !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) + num_slots += hweight32(adev->mes.gfx_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; @@ -420,6 +427,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->gfx.compute_ring[i].sched.ready && !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; + + if (!adev->sdma.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) + num_slots += hweight32(adev->mes.compute_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; @@ -429,6 +442,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->sdma.instance[i].ring.sched.ready && !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) + num_slots += hweight32(adev->mes.sdma_hqd_mask[i]); + } + ib_start_alignment = 256; ib_size_alignment = 4; break; @@ -570,6 +589,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; + result->userq_num_slots = num_slots; result->ib_start_alignment = ib_start_alignment; result->ib_size_alignment = ib_size_alignment; return 0; @@ -738,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); + ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); @@ -784,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; - mem.vram.heap_usage = - ttm_resource_manager_usage(vram_man); + mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(vram_man) : 0; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -919,6 +940,10 @@ out: if (adev->gfx.config.ta_cntl2_truncate_coord_mode) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; + /* Gang submit is not supported under SRIOV currently */ + if (!amdgpu_sriov_vf(adev)) + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT; + if (amdgpu_passthrough(adev)) dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT << AMDGPU_IDS_FLAGS_MODE_SHIFT) & @@ -1395,13 +1420,11 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; - r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id); - if (r) - goto error_pasid; + amdgpu_debugfs_vm_init(file_priv); - r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid); if (r) - goto error_vm; + goto error_pasid; fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { @@ -1442,10 +1465,8 @@ error_vm: amdgpu_vm_fini(adev, &fpriv->vm); error_pasid: - if (pasid) { + if (pasid) amdgpu_pasid_free(pasid); - amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); - } kfree(fpriv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 6fa9fa11c8f3..4883adcfbb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -47,7 +47,7 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) /* Bitmap for dynamic allocation of kernel doorbells */ mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); if (!mes->doorbell_bitmap) { - DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); + dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n"); return -ENOMEM; } @@ -191,6 +191,20 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error_doorbell; + if (adev->mes.hung_queue_db_array_size) { + r = amdgpu_bo_create_kernel(adev, + adev->mes.hung_queue_db_array_size * sizeof(u32), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); + goto error_doorbell; + } + } + return 0; error_doorbell: @@ -216,6 +230,10 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) { int i; + amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); @@ -256,7 +274,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev) r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to suspend all gangs"); + dev_err(adev->dev, "failed to suspend all gangs"); return r; } @@ -280,7 +298,7 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to resume all gangs"); + dev_err(adev->dev, "failed to resume all gangs"); return r; } @@ -304,7 +322,7 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to map legacy queue\n"); + dev_err(adev->dev, "failed to map legacy queue\n"); return r; } @@ -329,7 +347,7 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to unmap legacy queue\n"); + dev_err(adev->dev, "failed to unmap legacy queue\n"); return r; } @@ -361,7 +379,59 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); amdgpu_mes_unlock(&adev->mes); if (r) - DRM_ERROR("failed to reset legacy queue\n"); + dev_err(adev->dev, "failed to reset legacy queue\n"); + + return r; +} + +int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) +{ + return adev->mes.hung_queue_db_array_size; +} + +int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, + int queue_type, + bool detect_only, + unsigned int *hung_db_num, + u32 *hung_db_array) + +{ + struct mes_detect_and_reset_queue_input input; + u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr; + int r, i; + + if (!hung_db_num || !hung_db_array) + return -EINVAL; + + if ((queue_type != AMDGPU_RING_TYPE_GFX) && + (queue_type != AMDGPU_RING_TYPE_COMPUTE) && + (queue_type != AMDGPU_RING_TYPE_SDMA)) + return -EINVAL; + + /* Clear the doorbell array before detection */ + memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, + adev->mes.hung_queue_db_array_size * sizeof(u32)); + input.queue_type = queue_type; + input.detect_only = detect_only; + + r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, + &input); + if (r) { + dev_err(adev->dev, "failed to detect and reset\n"); + } else { + *hung_db_num = 0; + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { + if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { + hung_db_array[i] = db_array[i]; + *hung_db_num += 1; + } + } + + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ + } return r; } @@ -469,7 +539,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, int r; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes set shader debugger is not supported!\n"); + dev_err(adev->dev, + "mes set shader debugger is not supported!\n"); return -EINVAL; } @@ -493,7 +564,7 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to set_shader_debugger\n"); + dev_err(adev->dev, "failed to set_shader_debugger\n"); amdgpu_mes_unlock(&adev->mes); @@ -507,7 +578,8 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, int r; if (!adev->mes.funcs->misc_op) { - DRM_ERROR("mes flush shader debugger is not supported!\n"); + dev_err(adev->dev, + "mes flush shader debugger is not supported!\n"); return -EINVAL; } @@ -519,7 +591,7 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, r = adev->mes.funcs->misc_op(&adev->mes, &op_input); if (r) - DRM_ERROR("failed to set_shader_debugger\n"); + dev_err(adev->dev, "failed to set_shader_debugger\n"); amdgpu_mes_unlock(&adev->mes); @@ -619,14 +691,11 @@ out: bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) { uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; - bool is_supported = false; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && - mes_rev >= 0x63) - is_supported = true; - return is_supported; + return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); } /* Fix me -- node_id is used to identify the correct MES instances in the future */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index c0d2c195fe2e..97c137c90f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -41,6 +41,7 @@ #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 #define AMDGPU_MES_MSCRATCH_SIZE 0x40000 +#define AMDGPU_MES_INVALID_DB_OFFSET 0xffffffff enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, @@ -147,6 +148,11 @@ struct amdgpu_mes { uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES]; void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; + int hung_queue_db_array_size; + int hung_queue_hqd_info_offset; + struct amdgpu_bo *hung_queue_db_array_gpu_obj; + uint64_t hung_queue_db_array_gpu_addr; + void *hung_queue_db_array_cpu_addr; }; struct amdgpu_mes_gang { @@ -280,6 +286,18 @@ struct mes_reset_queue_input { bool is_kq; }; +struct mes_detect_and_reset_queue_input { + uint32_t queue_type; + bool detect_only; +}; + +struct mes_inv_tlbs_pasid_input { + uint32_t xcc_id; + uint16_t pasid; + uint8_t hub_id; + uint8_t flush_type; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -367,6 +385,13 @@ struct amdgpu_mes_funcs { int (*reset_hw_queue)(struct amdgpu_mes *mes, struct mes_reset_queue_input *input); + + int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input); + + + int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes, + struct mes_inv_tlbs_pasid_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -390,6 +415,13 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, unsigned int vmid, bool use_mmio); +int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev); +int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, + int queue_type, + bool detect_only, + unsigned int *hung_db_num, + u32 *hung_db_array); + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t val); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 6da4f946cac0..20460cfd09bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -496,8 +496,6 @@ struct amdgpu_crtc { struct drm_connector *connector; /* for dpm */ u32 line_time; - u32 wm_low; - u32 wm_high; u32 lb_vblank_lead_lines; struct drm_display_mode hw_mode; /* for virtual dce */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index d085687a47ea..a974265837f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -53,6 +53,16 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) return 0; } +bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev) || !adev->asic_funcs || + !adev->asic_funcs->get_pcie_replay_count || + (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count)) + return false; + + return true; +} + int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 79c2f807b9fe..b528de6a01f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -119,4 +119,6 @@ int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); +bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 73403744331a..e08f58de4b17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -32,6 +32,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/dma-buf.h> +#include <linux/export.h> #include <drm/drm_drv.h> #include <drm/amdgpu_drm.h> @@ -62,7 +63,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_kunmap(bo); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); drm_gem_object_release(&bo->tbo.base); amdgpu_bo_unref(&bo->parent); @@ -152,6 +153,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_MMIO_REMAP; + places[c].flags = 0; + c++; + } + if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -351,7 +360,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } -EXPORT_SYMBOL(amdgpu_bo_create_kernel); /** * amdgpu_bo_create_isp_user - create user BO for isp @@ -420,7 +428,6 @@ error_unreserve: return r; } -EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -524,7 +531,6 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } -EXPORT_SYMBOL(amdgpu_bo_free_kernel); /** * amdgpu_bo_free_isp_user - free BO for isp use @@ -547,7 +553,6 @@ void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) } amdgpu_bo_unref(&bo); } -EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, @@ -939,7 +944,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) domain = bo->preferred_domains & domain; /* A shared bo cannot be migrated to VRAM */ - if (bo->tbo.base.import_attach) { + if (drm_gem_is_imported(&bo->tbo.base)) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else @@ -967,7 +972,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) */ domain = amdgpu_bo_get_preferred_domain(adev, domain); - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_pin(bo->tbo.base.import_attach); /* force to pin into visible video ram */ @@ -1018,7 +1023,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.pin_count) return; - if (bo->tbo.base.import_attach) + if (drm_gem_is_imported(&bo->tbo.base)) dma_buf_unpin(bo->tbo.base.import_attach); if (bo->tbo.resource->mem_type == TTM_PL_VRAM) { @@ -1263,7 +1268,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, amdgpu_bo_kunmap(abo); - if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && + if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) && old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); @@ -1316,7 +1321,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out; - r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true, + AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); if (WARN_ON(r)) goto out; @@ -1473,6 +1479,26 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) } /** + * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo + * @bo: amdgpu VRAM buffer object for which we query the offset + * + * Returns: + * current FB aperture GPU offset of the object. + */ +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + uint64_t offset, fb_base; + + WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM); + + fb_base = adev->gmc.fb_start; + fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base; + return amdgpu_gmc_sign_extend(offset); +} + +/** * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo * @bo: amdgpu object for which we query the offset * @@ -1528,6 +1554,8 @@ uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) return AMDGPU_PL_OA; case AMDGPU_GEM_DOMAIN_DOORBELL: return AMDGPU_PL_DOORBELL; + case AMDGPU_GEM_DOMAIN_MMIO_REMAP: + return AMDGPU_PL_MMIO_REMAP; default: return TTM_PL_SYSTEM; } @@ -1611,6 +1639,9 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) case AMDGPU_PL_DOORBELL: placement = "DOORBELL"; break; + case AMDGPU_PL_MMIO_REMAP: + placement = "MMIO REMAP"; + break; case TTM_PL_SYSTEM: default: placement = "CPU"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 375448627f7b..656b8a931dae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -69,7 +69,7 @@ struct amdgpu_bo_va_mapping { uint64_t last; uint64_t __subtree_last; uint64_t offset; - uint64_t flags; + uint32_t flags; }; /* User space allocated BO in a VM */ @@ -167,6 +167,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_OA; case AMDGPU_PL_DOORBELL: return AMDGPU_GEM_DOMAIN_DOORBELL; + case AMDGPU_PL_MMIO_REMAP: + return AMDGPU_GEM_DOMAIN_MMIO_REMAP; default: break; } @@ -304,6 +306,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, bool intr); int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); +u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e6f0b035e20b..8c0e5d03de50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -252,6 +252,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 3): + adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); psp_v14_0_set_psp_funcs(psp); break; case IP_VERSION(14, 0, 5): @@ -447,7 +448,7 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); - ret = -ENOMEM; + return -ENOMEM; } adev->psp.xgmi_context.supports_extended_data = @@ -574,9 +575,11 @@ static int psp_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } -int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t reg_val, uint32_t mask, bool check_changed) +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val, + uint32_t mask, uint32_t flags) { + bool check_changed = flags & PSP_WAITREG_CHANGED; + bool verbose = !(flags & PSP_WAITREG_NOVERBOSE); uint32_t val; int i; struct amdgpu_device *adev = psp->adev; @@ -596,6 +599,11 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, udelay(1); } + if (verbose) + dev_err(adev->dev, + "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x", + reg_index, mask, val, reg_val); + return -ETIME; } @@ -654,6 +662,14 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) return "BOOT_CFG"; case GFX_CMD_ID_CONFIG_SQ_PERFMON: return "CONFIG_SQ_PERFMON"; + case GFX_CMD_ID_FB_FW_RESERV_ADDR: + return "FB_FW_RESERV_ADDR"; + case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR: + return "FB_FW_RESERV_EXT_ADDR"; + case GFX_CMD_ID_SRIOV_SPATIAL_PART: + return "SPATIAL_PARTITION"; + case GFX_CMD_ID_FB_NPS_MODE: + return "NPS_MODE_CHANGE"; default: return "UNKNOWN CMD"; } @@ -865,12 +881,12 @@ static int psp_tmr_init(struct psp_context *psp) pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_HAS_VRAM(psp->adev) ? - AMDGPU_GEM_DOMAIN_VRAM : - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); } + if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo) + psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo); return ret; } @@ -984,6 +1000,106 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, return ret; } +static int psp_get_fw_reservation_info(struct psp_context *psp, + uint32_t cmd_id, + uint64_t *addr, + uint32_t *size) +{ + int ret; + uint32_t status; + struct psp_gfx_cmd_resp *cmd; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = cmd_id; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (ret) { + release_psp_cmd_buf(psp); + return ret; + } + + status = cmd->resp.status; + if (status == PSP_ERR_UNKNOWN_COMMAND) { + release_psp_cmd_buf(psp); + *addr = 0; + *size = 0; + return 0; + } + + *addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 | + cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo; + *size = cmd->resp.uresp.fw_reserve_info.reserve_size; + + release_psp_cmd_buf(psp); + + return 0; +} + +int psp_update_fw_reservation(struct psp_context *psp) +{ + int ret; + uint64_t reserv_addr, reserv_addr_ext; + uint32_t reserv_size, reserv_size_ext, mp0_ip_ver; + struct amdgpu_device *adev = psp->adev; + + mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0); + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + switch (mp0_ip_ver) { + case IP_VERSION(14, 0, 2): + if (adev->psp.sos.fw_version < 0x3b0e0d) + return 0; + break; + + case IP_VERSION(14, 0, 3): + if (adev->psp.sos.fw_version < 0x3a0e14) + return 0; + break; + + default: + return 0; + } + + ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size); + if (ret) + return ret; + ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext); + if (ret) + return ret; + + if (reserv_addr != adev->gmc.real_vram_size - reserv_size) { + dev_warn(adev->dev, "reserve fw region is not valid!\n"); + return 0; + } + + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + + reserv_size = roundup(reserv_size, SZ_1M); + + ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL); + if (ret) { + dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + return ret; + } + + reserv_size_ext = roundup(reserv_size_ext, SZ_1M); + + ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext, + &adev->mman.fw_reserved_memory_extend, NULL); + if (ret) { + dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL); + return ret; + } + + return 0; +} + static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) { struct psp_context *psp = &adev->psp; @@ -1270,6 +1386,11 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context) psp_copy_fw(psp, context->bin_desc.start_addr, context->bin_desc.size_bytes); + if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && + context->mem_context.shared_bo) + context->mem_context.shared_mc_addr = + amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo); + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context); ret = psp_cmd_submit_buf(psp, NULL, cmd, @@ -2231,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp) } ret = psp_ta_load(psp, &psp->securedisplay_context.context); - if (!ret) { + if (!ret && !psp->securedisplay_context.context.resp_status) { psp->securedisplay_context.context.initialized = true; mutex_init(&psp->securedisplay_context.mutex); } else @@ -2337,11 +2458,27 @@ bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev) return false; } +static void psp_update_gpu_addresses(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + + if (psp->cmd_buf_bo && psp->cmd_buf_mem) { + psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo); + psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo); + psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo); + } + if (adev->firmware.rbuf && psp->km_ring.ring_mem) + psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf); +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; int ret; + if (amdgpu_virt_xgmi_migrate_enabled(adev)) + psp_update_gpu_addresses(adev); + if (!amdgpu_sriov_vf(adev)) { if ((is_psp_fw_valid(psp->kdb)) && (psp->funcs->bootloader_load_kdb != NULL)) { @@ -2440,6 +2577,14 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + ret = psp_update_fw_reservation(psp); + if (ret) { + dev_err(adev->dev, "update fw reservation failed!\n"); + return ret; + } + } + if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; @@ -3522,8 +3667,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3799,8 +3948,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -4117,8 +4270,8 @@ rel_buf: static const struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, - .write_new = amdgpu_psp_vbflash_write, - .read_new = amdgpu_psp_vbflash_read, + .write = amdgpu_psp_vbflash_write, + .read = amdgpu_psp_vbflash_read, }; /** @@ -4181,7 +4334,7 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, - .bin_attrs_new = bin_flash_attrs, + .bin_attrs = bin_flash_attrs, .is_bin_visible = amdgpu_bin_flash_attr_is_visible, .is_visible = amdgpu_flash_attr_is_visible, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 428adc7f741d..237b624aa51c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -51,6 +51,17 @@ #define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10 #define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11 +/* Command register bit 31 set to indicate readiness */ +#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE) +#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK) + +/* Values to check for a successful GFX_CMD response wait. Check against + * both status bits and response state - helps to detect a command failure + * or other unexpected cases like a device drop reading all 0xFFs + */ +#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE) +#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK) + extern const struct attribute_group amdgpu_flash_attr_group; enum psp_shared_mem_size { @@ -123,6 +134,9 @@ enum psp_reg_prog_id { PSP_REG_LAST }; +#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */ +#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */ + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); int (*wait_for_bootloader)(struct psp_context *psp); @@ -521,8 +535,8 @@ extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; -extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t field_val, uint32_t mask, bool check_changed); +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, uint32_t flags); extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, uint32_t msec_timeout); @@ -588,7 +602,7 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name); int psp_get_fw_attestation_records_addr(struct psp_context *psp, uint64_t *output_ptr); - +int psp_update_fw_reservation(struct psp_context *psp); int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 38face981c3e..6e8aad91bcd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -171,13 +171,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t copy_pos += sizeof(uint32_t); - ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); - if (!ta_bin) - return -ENOMEM; - if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) { - ret = -EFAULT; - goto err_free_bin; - } + ta_bin = memdup_user(&buf[copy_pos], ta_bin_len); + if (IS_ERR(ta_bin)) + return PTR_ERR(ta_bin); /* Set TA context and functions */ set_ta_context_funcs(psp, ta_type, &context); @@ -327,13 +323,9 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size return -EFAULT; copy_pos += sizeof(uint32_t); - shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); - if (!shared_buf) - return -ENOMEM; - if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) { - ret = -EFAULT; - goto err_free_shared_buf; - } + shared_buf = memdup_user(&buf[copy_pos], shared_buf_len); + if (IS_ERR(shared_buf)) + return PTR_ERR(shared_buf); set_ta_context_funcs(psp, ta_type, &context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index de0944947eaf..e0ee21150860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -122,12 +122,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) -#define MAX_UMC_POISON_POLLING_TIME_ASYNC 300 //ms +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 10 #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms #define MAX_FLUSH_RETIRE_DWORK_TIMES 100 +#define BYPASS_ALLOCATED_ADDRESS 0x0 +#define BYPASS_INITIALIZATION_ADDRESS 0x1 + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -136,10 +139,14 @@ enum amdgpu_ras_retire_page_reservation { atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); -static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, uint64_t addr); -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, +static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); + +static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev); +static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev); + #ifdef CONFIG_X86_MCE_AMD static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); struct mce_notifier_adev_list { @@ -169,18 +176,16 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre struct eeprom_table_record err_rec; int ret; - if ((address >= adev->gmc.mc_vram_size) || - (address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = amdgpu_ras_check_bad_page(adev, address); + if (ret == -EINVAL) { dev_warn(adev->dev, - "RAS WARN: input address 0x%llx is invalid.\n", - address); + "RAS WARN: input address 0x%llx is invalid.\n", + address); return -EINVAL; - } - - if (amdgpu_ras_check_bad_page(adev, address)) { + } else if (ret == 1) { dev_warn(adev->dev, - "RAS WARN: 0x%llx has already been marked as bad page!\n", - address); + "RAS WARN: 0x%llx has already been marked as bad page!\n", + address); return 0; } @@ -207,6 +212,56 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre return 0; } +static int amdgpu_check_address_validity(struct amdgpu_device *adev, + uint64_t address, uint64_t flags) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_block_info blk_info; + uint64_t page_pfns[32] = {0}; + int i, ret, count; + bool hit = false; + + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) + return 0; + + if (amdgpu_sriov_vf(adev)) { + if (amdgpu_virt_check_vf_critical_region(adev, address, &hit)) + return -EPERM; + return hit ? -EACCES : 0; + } + + if ((address >= adev->gmc.mc_vram_size) || + (address >= RAS_UMC_INJECT_ADDR_LIMIT)) + return -EFAULT; + + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, + address, page_pfns, ARRAY_SIZE(page_pfns)); + if (count <= 0) + return -EPERM; + + for (i = 0; i < count; i++) { + memset(&blk_info, 0, sizeof(blk_info)); + ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr, + page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info); + if (!ret) { + /* The input address that needs to be checked is allocated by + * current calling process, so it is necessary to exclude + * the calling process. + */ + if ((flags == BYPASS_ALLOCATED_ADDRESS) && + ((blk_info.task.pid != task_pid_nr(current)) || + strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN))) + return -EACCES; + else if ((flags == BYPASS_INITIALIZATION_ADDRESS) && + (blk_info.task.pid == con->init_task_pid) && + !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN)) + return -EACCES; + } + } + + return 0; +} + static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -297,6 +352,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, op = 2; else if (strstr(str, "retire_page") != NULL) op = 3; + else if (strstr(str, "check_address") != NULL) + op = 4; else if (str[0] && str[1] && str[2] && str[3]) /* ascii string, but commands are not matched. */ return -EINVAL; @@ -311,6 +368,15 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->inject.address = address; return 0; + } else if (op == 4) { + if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 && + sscanf(str, "%*s %llu %llu", &address, &value) != 2) + return -EINVAL; + + data->op = op; + data->inject.address = address; + data->inject.value = value; + return 0; } if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) @@ -500,6 +566,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, return size; else return ret; + } else if (data.op == 4) { + ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value); + return ret ? ret : size; } if (!amdgpu_ras_is_supported(adev, data.head.block)) @@ -513,22 +582,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - if ((data.inject.address >= adev->gmc.mc_vram_size && - adev->gmc.mc_vram_size) || - (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { - dev_warn(adev->dev, "RAS WARN: input address " - "0x%llx is invalid.", + /* umc ce/ue error injection for a bad page is not allowed */ + if (data.head.block == AMDGPU_RAS_BLOCK__UMC) + ret = amdgpu_ras_check_bad_page(adev, data.inject.address); + if (ret == -EINVAL) { + dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.", data.inject.address); - ret = -EINVAL; break; - } - - /* umc ce/ue error injection for a bad page is not allowed */ - if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && - amdgpu_ras_check_bad_page(adev, data.inject.address)) { - dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has " - "already been marked as bad!\n", - data.inject.address); + } else if (ret == 1) { + dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n", + data.inject.address); break; } @@ -1107,6 +1170,9 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, err_info->de_count, blk_name); } } else { + if (adev->debug_disable_ce_logs) + return; + for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; @@ -2124,7 +2190,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) con->badpages_attr = bin_attr_gpu_vram_bad_pages; sysfs_bin_attr_init(&con->badpages_attr); bin_attrs[0] = &con->badpages_attr; - group.bin_attrs_new = bin_attrs; + group.bin_attrs = bin_attrs; } r = sysfs_create_group(&adev->dev->kobj, &group); @@ -2563,18 +2629,26 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, goto out; } - *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL); if (!*bps) { ret = -ENOMEM; goto out; } for (; i < data->count; i++) { + if (!data->bps[i].ts) + continue; + (*bps)[i] = (struct ras_badpage){ .bp = data->bps[i].retired_page, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; + + if (amdgpu_ras_check_critical_address(adev, + data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); if (status == -EBUSY) @@ -2583,7 +2657,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } - *count = data->count; + *count = con->bad_page_num; out: mutex_unlock(&con->recovery_lock); return ret; @@ -2635,6 +2709,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + unsigned int error_query_mode; enum ras_event_type type; if (hive) { @@ -2663,6 +2738,13 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) device_list_handle = &device_list; } + if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) { + if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) { + /* wait 500ms to ensure pmfw polling mca bank info done */ + msleep(500); + } + } + type = amdgpu_ras_get_fatal_error_event(adev); list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { @@ -2719,7 +2801,7 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, unsigned int old_space = data->count + data->space_left; unsigned int new_space = old_space + pages; unsigned int align_space = ALIGN(new_space, 512); - void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); + void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL); if (!bps) { return -ENOMEM; @@ -2811,8 +2893,11 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, for (j = 0; j < count; j++) { if (amdgpu_ras_check_bad_page_unlock(con, - bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) { + data->count++; + data->space_left--; continue; + } if (!data->space_left && amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { @@ -2825,6 +2910,7 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, sizeof(struct eeprom_table_record)); data->count++; data->space_left--; + con->bad_page_num++; } return 0; @@ -2854,6 +2940,13 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps[0].address; + err_data->err_addr[i].mem_channel = bps[0].mem_channel; + err_data->err_addr[i].bank = bps[0].bank; + err_data->err_addr[i].err_type = bps[0].err_type; + err_data->err_addr[i].mcumc_id = bps[0].mcumc_id; + } } else { if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data)) return -EINVAL; @@ -2885,6 +2978,7 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, struct eeprom_table_record *bps, struct ras_err_data *err_data, enum amdgpu_memory_partition nps) { + int i = 0; enum amdgpu_memory_partition save_nps; save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; @@ -2894,6 +2988,13 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps->address; + err_data->err_addr[i].mem_channel = bps->mem_channel; + err_data->err_addr[i].bank = bps->bank; + err_data->err_addr[i].err_type = bps->err_type; + err_data->err_addr[i].mcumc_id = bps->mcumc_id; + } } else { if (bps->address) { if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) @@ -2956,7 +3057,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ret = __amdgpu_ras_convert_rec_array_from_rom(adev, &bps[i], &err_data, nps); if (ret) - control->ras_num_bad_pages -= adev->umc.retire_unit; + con->bad_page_num -= adev->umc.retire_unit; i += (adev->umc.retire_unit - 1); } else { break; @@ -2970,8 +3071,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, ret = __amdgpu_ras_convert_rec_from_rom(adev, &bps[i], &err_data, nps); if (ret) - control->ras_num_bad_pages -= adev->umc.retire_unit; + con->bad_page_num -= adev->umc.retire_unit; } + + con->eh_data->count_saved = con->eh_data->count; } else { ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); } @@ -2994,7 +3097,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count, unit_num, bad_page_num, i; + int save_count, unit_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -3003,30 +3106,38 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, return 0; } + if (!con->eeprom_control.is_eeprom_valid) { + dev_warn(adev->dev, + "Failed to save EEPROM table data because of EEPROM data corruption!"); + if (new_cnt) + *new_cnt = 0; + + return 0; + } + mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - bad_page_num = control->ras_num_bad_pages; - save_count = data->count - bad_page_num; + unit_num = data->count / adev->umc.retire_unit - control->ras_num_recs; + save_count = con->bad_page_num - control->ras_num_bad_pages; mutex_unlock(&con->recovery_lock); - unit_num = save_count / adev->umc.retire_unit; if (new_cnt) *new_cnt = unit_num; /* only new entries are saved */ - if (save_count > 0) { + if (unit_num > 0) { /*old asics only save pa to eeprom like before*/ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { if (amdgpu_ras_eeprom_append(control, - &data->bps[bad_page_num], save_count)) { + &data->bps[data->count_saved], unit_num)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; } } else { for (i = 0; i < unit_num; i++) { if (amdgpu_ras_eeprom_append(control, - &data->bps[bad_page_num + + &data->bps[data->count_saved + i * adev->umc.retire_unit], 1)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; @@ -3035,6 +3146,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); + data->count_saved = data->count; } return 0; @@ -3089,17 +3201,17 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) } } + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); + if (ret) + goto out; + ret = amdgpu_ras_eeprom_check(control); if (ret) goto out; /* HW not usable */ - if (amdgpu_ras_is_rma(adev)) { + if (amdgpu_ras_is_rma(adev)) ret = -EHWPOISON; - goto out; - } - - ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); } out: @@ -3107,18 +3219,24 @@ out: return ret; } -static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, uint64_t addr) { struct ras_err_handler_data *data = con->eh_data; + struct amdgpu_device *adev = con->adev; int i; + if ((addr >= adev->gmc.mc_vram_size && + adev->gmc.mc_vram_size) || + (addr >= RAS_UMC_INJECT_ADDR_LIMIT)) + return -EINVAL; + addr >>= AMDGPU_GPU_PAGE_SHIFT; for (i = 0; i < data->count; i++) if (addr == data->bps[i].retired_page) - return true; + return 1; - return false; + return 0; } /* @@ -3126,11 +3244,11 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, * * Note: this check is only for umc block */ -static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, +static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool ret = false; + int ret = 0; if (!con || !con->eh_data) return ret; @@ -3214,7 +3332,7 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; - ecc_log->prev_de_queried_count = 0; + ecc_log->consumption_q_count = 0; } static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) @@ -3234,7 +3352,7 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) mutex_destroy(&ecc_log->lock); ecc_log->de_queried_count = 0; - ecc_log->prev_de_queried_count = 0; + ecc_log->consumption_q_count = 0; } static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con, @@ -3260,7 +3378,6 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) page_retirement_dwork.work); struct amdgpu_device *adev = con->adev; struct ras_err_data err_data; - unsigned long err_cnt; /* If gpu reset is ongoing, delay retiring the bad pages */ if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) { @@ -3272,13 +3389,9 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_init(&err_data); amdgpu_umc_handle_bad_pages(adev, &err_data); - err_cnt = err_data.err_addr_cnt; amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && amdgpu_ras_is_rma(adev)) - amdgpu_ras_reset_gpu(adev); - amdgpu_ras_schedule_retirement_dwork(con, AMDGPU_RAS_RETIRE_PAGE_INTERVAL); } @@ -3289,58 +3402,39 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, int ret = 0; struct ras_ecc_log_info *ecc_log; struct ras_query_if info; - uint32_t timeout = 0; + u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - uint64_t de_queried_count; - uint32_t new_detect_count, total_detect_count; - uint32_t need_query_count = poison_creation_count; - bool query_data_timeout = false; + u64 de_queried_count; + u64 consumption_q_count; enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; memset(&info, 0, sizeof(info)); info.head.block = AMDGPU_RAS_BLOCK__UMC; ecc_log = &ras->umc_ecc_log; - total_detect_count = 0; + ecc_log->de_queried_count = 0; + ecc_log->consumption_q_count = 0; + do { ret = amdgpu_ras_query_error_status_with_event(adev, &info, type); if (ret) return ret; de_queried_count = ecc_log->de_queried_count; - if (de_queried_count > ecc_log->prev_de_queried_count) { - new_detect_count = de_queried_count - ecc_log->prev_de_queried_count; - ecc_log->prev_de_queried_count = de_queried_count; - timeout = 0; - } else { - new_detect_count = 0; - } + consumption_q_count = ecc_log->consumption_q_count; - if (new_detect_count) { - total_detect_count += new_detect_count; - } else { - if (!timeout && need_query_count) - timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC; - - if (timeout) { - if (!--timeout) { - query_data_timeout = true; - break; - } - msleep(1); - } - } - } while (total_detect_count < need_query_count); + if (de_queried_count && consumption_q_count) + break; - if (query_data_timeout) { - dev_warn(adev->dev, "Can't find deferred error! count: %u\n", - (need_query_count - total_detect_count)); - return -ENOENT; - } + msleep(100); + } while (--timeout); - if (total_detect_count) + if (de_queried_count) schedule_delayed_work(&ras->page_retirement_dwork, 0); + if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0) + amdgpu_ras_reset_gpu(adev); + return 0; } @@ -3376,6 +3470,12 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, reset_flags |= msg.reset; } + /* + * Try to ensure poison creation handler is completed first + * to set rma if bad page exceed threshold. + */ + flush_delayed_work(&con->page_retirement_dwork); + /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) @@ -3385,8 +3485,6 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, else reset = reset_flags; - flush_delayed_work(&con->page_retirement_dwork); - con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); @@ -3416,6 +3514,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) if (kthread_should_stop()) break; + mutex_lock(&con->poison_lock); gpu_reset = 0; do { @@ -3428,7 +3527,8 @@ static int amdgpu_ras_page_retirement_thread(void *param) atomic_sub(poison_creation_count, &con->poison_creation_count); atomic_sub(poison_creation_count, &con->page_retirement_req_cnt); } - } while (atomic_read(&con->poison_creation_count)); + } while (atomic_read(&con->poison_creation_count) && + !atomic_read(&con->poison_consumption_count)); if (ret != -EIO) { msg_count = kfifo_len(&con->poison_fifo); @@ -3445,6 +3545,7 @@ static int amdgpu_ras_page_retirement_thread(void *param) /* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */ /* Clear poison creation request */ atomic_set(&con->poison_creation_count, 0); + atomic_set(&con->poison_consumption_count, 0); /* Clear poison fifo */ amdgpu_ras_clear_poison_fifo(adev); @@ -3469,9 +3570,12 @@ static int amdgpu_ras_page_retirement_thread(void *param) atomic_sub(msg_count, &con->page_retirement_req_cnt); } + atomic_set(&con->poison_consumption_count, 0); + /* Wake up work to save bad pages to eeprom */ schedule_delayed_work(&con->page_retirement_dwork, 0); } + mutex_unlock(&con->poison_lock); } return 0; @@ -3488,8 +3592,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) control = &con->eeprom_control; ret = amdgpu_ras_eeprom_init(control); - if (ret) - return ret; + control->is_eeprom_valid = !ret; if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) control->ras_num_pa_recs = control->ras_num_recs; @@ -3498,10 +3601,12 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) adev->umc.ras->get_retire_flip_bits) adev->umc.ras->get_retire_flip_bits(adev); - if (control->ras_num_recs) { + if (control->ras_num_recs && control->is_eeprom_valid) { ret = amdgpu_ras_load_bad_pages(adev); - if (ret) - return ret; + if (ret) { + control->is_eeprom_valid = false; + return 0; + } amdgpu_dpm_send_hbm_bad_pages_num( adev, control->ras_num_bad_pages); @@ -3520,7 +3625,7 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); } - return ret; + return 0; } int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) @@ -3551,8 +3656,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) } mutex_init(&con->recovery_lock); + mutex_init(&con->poison_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); + atomic_set(&con->rma_in_recovery, 0); con->eeprom_control.bad_channel_bitmap = 0; max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control); @@ -3570,6 +3677,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info) init_waitqueue_head(&con->page_retirement_wq); atomic_set(&con->page_retirement_req_cnt, 0); atomic_set(&con->poison_creation_count, 0); + atomic_set(&con->poison_consumption_count, 0); con->page_retirement_thread = kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); if (IS_ERR(con->page_retirement_thread)) { @@ -3642,6 +3750,8 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) kfree(data); mutex_unlock(&con->recovery_lock); + amdgpu_ras_critical_region_init(adev); + return 0; } /* recovery end */ @@ -4068,6 +4178,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } + con->init_task_pid = task_pid_nr(current); + get_task_comm(con->init_task_comm, current); + + mutex_init(&con->critical_region_lock); + INIT_LIST_HEAD(&con->critical_region_head); + dev_info(adev->dev, "RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", adev->ras_hw_enabled, adev->ras_enabled); @@ -4347,6 +4463,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) if (!adev->ras_enabled || !con) return 0; + amdgpu_ras_critical_region_fini(adev); + mutex_destroy(&con->critical_region_lock); + list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) { if (ras_node->ras_obj) { obj = ras_node->ras_obj; @@ -4414,8 +4533,10 @@ void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) struct amdgpu_ras *ras; ras = amdgpu_ras_get_context(adev); - if (ras) + if (ras) { ras->ras_err_state = 0; + ras->gpu_reset_flags = 0; + } } void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, @@ -5253,6 +5374,9 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn) uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT; int ret = 0; + if (amdgpu_ras_check_critical_address(adev, start)) + return 0; + mutex_lock(&con->page_rsv_lock); ret = amdgpu_vram_mgr_query_page_status(mgr, start); if (ret == -ENOENT) @@ -5289,3 +5413,80 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev) return con->is_rma; } + +int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, + struct amdgpu_bo *bo) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_mgr_resource *vres; + struct ras_critical_region *region; + struct drm_buddy_block *block; + int ret = 0; + + if (!bo || !bo->tbo.resource) + return -EINVAL; + + vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource); + + mutex_lock(&con->critical_region_lock); + + /* Check if the bo had been recorded */ + list_for_each_entry(region, &con->critical_region_head, node) + if (region->bo == bo) + goto out; + + /* Record new critical amdgpu bo */ + list_for_each_entry(block, &vres->blocks, link) { + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) { + ret = -ENOMEM; + goto out; + } + region->bo = bo; + region->start = amdgpu_vram_mgr_block_start(block); + region->size = amdgpu_vram_mgr_block_size(block); + list_add_tail(®ion->node, &con->critical_region_head); + } + +out: + mutex_unlock(&con->critical_region_lock); + + return ret; +} + +static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev) +{ + amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory); +} + +static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_critical_region *region, *tmp; + + mutex_lock(&con->critical_region_lock); + list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) { + list_del(®ion->node); + kfree(region); + } + mutex_unlock(&con->critical_region_lock); +} + +bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_critical_region *region; + bool ret = false; + + mutex_lock(&con->critical_region_lock); + list_for_each_entry(region, &con->critical_region_head, node) { + if ((region->start <= addr) && + (addr < (region->start + region->size))) { + ret = true; + break; + } + } + mutex_unlock(&con->critical_region_lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 927d6bff734a..6cf0dfd38be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -492,8 +492,15 @@ struct ras_ecc_err { struct ras_ecc_log_info { struct mutex lock; struct radix_tree_root de_page_tree; - uint64_t de_queried_count; - uint64_t prev_de_queried_count; + uint64_t de_queried_count; + uint64_t consumption_q_count; +}; + +struct ras_critical_region { + struct list_head node; + struct amdgpu_bo *bo; + uint64_t start; + uint64_t size; }; struct amdgpu_ras { @@ -515,6 +522,7 @@ struct amdgpu_ras { /* gpu recovery */ struct work_struct recovery_work; atomic_t in_recovery; + atomic_t rma_in_recovery; struct amdgpu_device *adev; /* error handler data */ struct ras_err_handler_data *eh_data; @@ -557,6 +565,7 @@ struct amdgpu_ras { struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; atomic_t poison_creation_count; + atomic_t poison_consumption_count; struct mutex page_rsv_lock; DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); struct ras_ecc_log_info umc_ecc_log; @@ -570,6 +579,17 @@ struct amdgpu_ras { struct ras_event_manager *event_mgr; uint64_t reserved_pages_in_bytes; + + pid_t init_task_pid; + char init_task_comm[TASK_COMM_LEN]; + + int bad_page_num; + + struct list_head critical_region_head; + struct mutex critical_region_lock; + + /* Protect poison injection */ + struct mutex poison_lock; }; struct ras_fs_data { @@ -608,6 +628,7 @@ struct ras_err_handler_data { struct eeprom_table_record *bps; /* the count of entries */ int count; + int count_saved; /* the space can place new entries */ int space_left; }; @@ -973,6 +994,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); +int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo); +bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr); + int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 2c58e09e56f9..3eb3fb55ccb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -277,10 +277,11 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control) up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Failed to write EEPROM table header:%d", res); + dev_err(adev->dev, "Failed to write EEPROM table header:%d", + res); } else if (res < RAS_TABLE_HEADER_SIZE) { - DRM_ERROR("Short write:%d out of %d\n", - res, RAS_TABLE_HEADER_SIZE); + dev_err(adev->dev, "Short write:%d out of %d\n", res, + RAS_TABLE_HEADER_SIZE); res = -EIO; } else { res = 0; @@ -323,7 +324,8 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control) buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("Failed to alloc buf to write table ras info\n"); + dev_err(adev->dev, + "Failed to alloc buf to write table ras info\n"); return -ENOMEM; } @@ -338,10 +340,11 @@ static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control) up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Failed to write EEPROM table ras info:%d", res); + dev_err(adev->dev, "Failed to write EEPROM table ras info:%d", + res); } else if (res < RAS_TABLE_V2_1_INFO_SIZE) { - DRM_ERROR("Short write:%d out of %d\n", - res, RAS_TABLE_V2_1_INFO_SIZE); + dev_err(adev->dev, "Short write:%d out of %d\n", res, + RAS_TABLE_V2_1_INFO_SIZE); res = -EIO; } else { res = 0; @@ -476,6 +479,8 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) control->ras_num_recs = 0; control->ras_num_bad_pages = 0; + control->ras_num_mca_recs = 0; + control->ras_num_pa_recs = 0; control->ras_fri = 0; amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages); @@ -607,13 +612,13 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Writing %d EEPROM table records error:%d", - num, res); + dev_err(adev->dev, "Writing %d EEPROM table records error:%d", + num, res); } else if (res < buf_size) { /* Short write, return error. */ - DRM_ERROR("Wrote %d records out of %d", - res / RAS_TABLE_RECORD_SIZE, num); + dev_err(adev->dev, "Wrote %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); res = -EIO; } else { res = 0; @@ -738,8 +743,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, else control->ras_num_mca_recs += num; - control->ras_num_bad_pages = control->ras_num_pa_recs + - control->ras_num_mca_recs * adev->umc.retire_unit; + control->ras_num_bad_pages = con->bad_page_num; Out: kfree(buf); return res; @@ -761,15 +765,19 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { - control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; - control->tbl_rai.health_percent = 0; - } + + if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev)) + dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); if ((amdgpu_bad_page_threshold != -1) && - (amdgpu_bad_page_threshold != -2)) + (amdgpu_bad_page_threshold != -2)) { + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { + control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; + control->tbl_rai.health_percent = 0; + } ras->is_rma = true; + } /* ignore the -ENOTSUPP return value */ amdgpu_dpm_send_rma_reason(adev); @@ -787,8 +795,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("allocating memory for table of size %d bytes failed\n", - control->tbl_hdr.tbl_size); + dev_err(adev->dev, + "allocating memory for table of size %d bytes failed\n", + control->tbl_hdr.tbl_size); res = -ENOMEM; goto Out; } @@ -800,12 +809,11 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("EEPROM failed reading records:%d\n", - res); + dev_err(adev->dev, "EEPROM failed reading records:%d\n", res); goto Out; } else if (res < buf_size) { - DRM_ERROR("EEPROM read %d out of %d bytes\n", - res, buf_size); + dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res, + buf_size); res = -EIO; goto Out; } @@ -866,11 +874,12 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, return 0; if (num == 0) { - DRM_ERROR("will not append 0 records\n"); + dev_err(adev->dev, "will not append 0 records\n"); return -EINVAL; } else if (num > control->ras_max_record_count) { - DRM_ERROR("cannot append %d records than the size of table %d\n", - num, control->ras_max_record_count); + dev_err(adev->dev, + "cannot append %d records than the size of table %d\n", + num, control->ras_max_record_count); return -EINVAL; } @@ -924,13 +933,13 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, buf, buf_size); up_read(&adev->reset_domain->sem); if (res < 0) { - DRM_ERROR("Reading %d EEPROM table records error:%d", - num, res); + dev_err(adev->dev, "Reading %d EEPROM table records error:%d", + num, res); } else if (res < buf_size) { /* Short read, return error. */ - DRM_ERROR("Read %d records out of %d", - res / RAS_TABLE_RECORD_SIZE, num); + dev_err(adev->dev, "Read %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); res = -EIO; } else { res = 0; @@ -964,11 +973,11 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, return 0; if (num == 0) { - DRM_ERROR("will not read 0 records\n"); + dev_err(adev->dev, "will not read 0 records\n"); return -EINVAL; } else if (num > control->ras_num_recs) { - DRM_ERROR("too many records to read:%d available:%d\n", - num, control->ras_num_recs); + dev_err(adev->dev, "too many records to read:%d available:%d\n", + num, control->ras_num_recs); return -EINVAL; } @@ -1300,7 +1309,8 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) { - DRM_ERROR("Out of memory checking RAS table checksum.\n"); + dev_err(adev->dev, + "Out of memory checking RAS table checksum.\n"); return -ENOMEM; } @@ -1309,7 +1319,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control control->ras_header_offset, buf, buf_size); if (res < buf_size) { - DRM_ERROR("Partial read for checksum, res:%d\n", res); + dev_err(adev->dev, "Partial read for checksum, res:%d\n", res); /* On partial reads, return -EIO. */ if (res >= 0) @@ -1334,7 +1344,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control) buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); if (!buf) { - DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n"); + dev_err(adev->dev, + "Failed to alloc buf to read EEPROM table ras info\n"); return -ENOMEM; } @@ -1346,7 +1357,8 @@ static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control) control->i2c_address + control->ras_info_offset, buf, RAS_TABLE_V2_1_INFO_SIZE); if (res < RAS_TABLE_V2_1_INFO_SIZE) { - DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res); + dev_err(adev->dev, + "Failed to read EEPROM table ras info, res:%d", res); res = res >= 0 ? -EIO : res; goto Out; } @@ -1387,7 +1399,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); if (res < RAS_TABLE_HEADER_SIZE) { - DRM_ERROR("Failed to read EEPROM table header, res:%d", res); + dev_err(adev->dev, "Failed to read EEPROM table header, res:%d", + res); return res >= 0 ? -EIO : res; } @@ -1448,12 +1461,12 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) if (!__get_eeprom_i2c_addr(adev, control)) return -EINVAL; - control->ras_num_bad_pages = control->ras_num_pa_recs + - control->ras_num_mca_recs * adev->umc.retire_unit; + control->ras_num_bad_pages = ras->bad_page_num; if (hdr->header == RAS_TABLE_HDR_VAL) { - DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->ras_num_bad_pages); + dev_dbg(adev->dev, + "Found existing EEPROM table with %d records", + control->ras_num_bad_pages); if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); @@ -1521,3 +1534,31 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) return res < 0 ? res : 0; } + +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; + int res; + + if (!__is_ras_eeprom_supported(adev) || !ras) + return; + control = &ras->eeprom_control; + if (!control->is_eeprom_valid) + return; + res = __verify_ras_table_checksum(control); + if (res) { + dev_warn(adev->dev, + "RAS table incorrect checksum or error:%d, try to recover\n", + res); + if (!amdgpu_ras_eeprom_reset_table(control)) + if (!amdgpu_ras_save_bad_pages(adev, NULL)) + if (!__verify_ras_table_checksum(control)) { + dev_info(adev->dev, "RAS table recovery succeed\n"); + return; + } + dev_err(adev->dev, "RAS table recovery failed\n"); + control->is_eeprom_valid = false; + } + return; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index ec6d7ea37ad0..ebfca4cb5688 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -114,6 +114,8 @@ struct amdgpu_ras_eeprom_control { /* Record channel info which occurred bad pages */ u32 bad_channel_bitmap; + + bool is_eeprom_valid; }; /* @@ -159,6 +161,8 @@ void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 50fcd86e1033..be2e56ce1355 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -91,6 +91,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, break; case TTM_PL_TT: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: node = to_ttm_range_mgr_node(res)->mm_nodes; while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; @@ -153,6 +154,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) break; case TTM_PL_TT: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: node = cur->node; cur->node = ++node; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index dabfbdf6f1ce..28c4ad62f50e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -340,6 +340,9 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf, case AMDGPU_RESET_SRC_USER: strscpy(buf, "user trigger", len); break; + case AMDGPU_RESET_SRC_USERQ: + strscpy(buf, "user queue trigger", len); + break; default: strscpy(buf, "unknown", len); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4d9b9701139b..07b4d37f1db6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -43,6 +43,7 @@ enum AMDGPU_RESET_SRCS { AMDGPU_RESET_SRC_MES, AMDGPU_RESET_SRC_HWS, AMDGPU_RESET_SRC_USER, + AMDGPU_RESET_SRC_USERQ, }; struct amdgpu_reset_context { @@ -160,4 +161,16 @@ int amdgpu_reset_do_xgmi_reset_on_init( bool amdgpu_reset_in_recovery(struct amdgpu_device *adev); +static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev, + bool status) +{ + adev->pcie_reset_ctx.occurs_dpc = status; + adev->no_hw_access = status; +} + +static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev) +{ + return adev->pcie_reset_ctx.occurs_dpc; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 426834806fbf..5ec5c3ff22bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -99,6 +99,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) return 0; } +/** + * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit + * + * @ring: amdgpu_ring structure holding ring information + * @ndw: number of dwords to allocate in the ring buffer + * + * Allocate @ndw dwords in the ring buffer (all asics). + * doesn't check the max_dw limit as we may be reemitting + * several submissions. + */ +static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) +{ + /* Align requested size with padding so unlock_commit can + * pad safely */ + ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; + + ring->count_dw = ndw; + ring->wptr_old = ring->wptr; + + if (ring->funcs->begin_use) + ring->funcs->begin_use(ring); +} + /** amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information @@ -333,15 +356,23 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Initialize cached_rptr to 0 */ ring->cached_rptr = 0; + if (!ring->ring_backup) { + ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL); + if (!ring->ring_backup) + return -ENOMEM; + } + /* Allocate ring buffer */ if (ring->ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, &ring->gpu_addr, (void **)&ring->ring); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); + kvfree(ring->ring_backup); return r; } amdgpu_ring_clear_ring(ring); @@ -385,12 +416,12 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, (void **)&ring->ring); + kvfree(ring->ring_backup); + ring->ring_backup = NULL; dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - - ring->adev->rings[ring->idx] = NULL; } /** @@ -427,6 +458,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { unsigned long flags; ktime_t deadline; + bool ret; if (unlikely(ring->adev->debug_disable_soft_recovery)) return false; @@ -441,12 +473,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, dma_fence_set_error(fence, -ENODATA); spin_unlock_irqrestore(fence->lock, flags); - atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ring->funcs->soft_recovery(ring, vmid); - return dma_fence_is_signaled(fence); + ret = dma_fence_is_signaled(fence); + /* increment the counter only if soft reset worked */ + if (ret) + atomic_inc(&ring->adev->gpu_reset_counter); + + return ret; } /* @@ -682,6 +718,7 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, prop->eop_gpu_addr = ring->eop_gpu_addr; prop->use_doorbell = ring->use_doorbell; prop->doorbell_index = ring->doorbell_index; + prop->kernel_queue = true; /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. @@ -753,3 +790,69 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) return true; } + +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + /* Stop the scheduler to prevent anybody else from touching the ring buffer. */ + drm_sched_wqueue_stop(&ring->sched); + /* back up the non-guilty commands */ + amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence); +} + +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + unsigned int i; + int r; + + /* verify that the ring is functional */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + + /* signal the guilty fence and set an error on all fences from the context */ + if (guilty_fence) + amdgpu_fence_driver_guilty_force_completion(guilty_fence); + /* Re-emit the non-guilty commands */ + if (ring->ring_backup_entries_to_copy) { + amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); + for (i = 0; i < ring->ring_backup_entries_to_copy; i++) + amdgpu_ring_write(ring, ring->ring_backup[i]); + amdgpu_ring_commit(ring); + } + /* Start the scheduler again */ + drm_sched_wqueue_start(&ring->sched); + return 0; +} + +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type) +{ + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + if (ring->adev->gfx.gfx_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_COMPUTE: + if (ring->adev->gfx.compute_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_SDMA: + if (ring->adev->sdma.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_DEC: + case AMDGPU_RING_TYPE_VCN_ENC: + if (ring->adev->vcn.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_JPEG: + if (ring->adev->jpeg.supported_reset & reset_type) + return true; + break; + default: + break; + } + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b95b47110769..4b46e3c26ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,10 +114,11 @@ struct amdgpu_sched { */ struct amdgpu_fence_driver { uint64_t gpu_addr; - volatile uint32_t *cpu_addr; + uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; + u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -127,11 +128,35 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; + + /* wptr for the fence for resets */ + u64 wptr; + /* fence context for resets */ + u64 context; + uint32_t seq; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); +void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -141,8 +166,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job, - unsigned flags); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + struct amdgpu_fence *af, unsigned int flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); @@ -186,7 +211,18 @@ struct amdgpu_ring_funcs { bool support_64bit_ptrs; bool no_user_fence; bool secure_submission_supported; - unsigned extra_dw; + + /** + * @extra_bytes: + * + * Optional extra space in bytes that is added to the ring size + * when allocating the BO that holds the contents of the ring. + * This space isn't used for command submission to the ring, + * but is just there to satisfy some hardware requirements or + * implement workarounds. It's up to the implementation of each + * specific ring to initialize this space. + */ + unsigned extra_bytes; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); @@ -252,9 +288,9 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); - int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid, + struct amdgpu_fence *timedout_fence); void (*emit_cleaner_shader)(struct amdgpu_ring *ring); - bool (*is_guilty)(struct amdgpu_ring *ring); }; /** @@ -268,9 +304,12 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; uint32_t *ring; + /* backups for resets */ + uint32_t *ring_backup; + unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; - volatile u32 *rptr_cpu_addr; + u32 *rptr_cpu_addr; /** * @wptr: @@ -350,19 +389,19 @@ struct amdgpu_ring { * This is the CPU address pointer in the writeback slot. This is used * to commit changes to the GPU. */ - volatile u32 *wptr_cpu_addr; + u32 *wptr_cpu_addr; unsigned fence_offs; u64 fence_gpu_addr; - volatile u32 *fence_cpu_addr; + u32 *fence_cpu_addr; uint64_t current_ctx; char name[16]; u32 trail_seq; unsigned trail_fence_offs; u64 trail_fence_gpu_addr; - volatile u32 *trail_fence_cpu_addr; + u32 *trail_fence_cpu_addr; unsigned cond_exe_offs; u64 cond_exe_gpu_addr; - volatile u32 *cond_exe_cpu_addr; + u32 *cond_exe_cpu_addr; unsigned int set_q_mode_offs; u32 *set_q_mode_ptr; u64 set_q_mode_token; @@ -409,7 +448,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) -#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) +#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); @@ -442,10 +481,7 @@ static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { - int i = 0; - while (i <= ring->buf_mask) - ring->ring[i++] = ring->funcs->nop; - + memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1); } static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) @@ -534,4 +570,12 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index db5791e1a7ce..5aa830a02d80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -89,7 +89,7 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id) int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) { const u32 *src_ptr; - volatile u32 *dst_ptr; + u32 *dst_ptr; u32 i; int r; @@ -189,7 +189,7 @@ int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev) void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) { const __le32 *fw_data; - volatile u32 *dst_ptr; + u32 *dst_ptr; int me, i, max_me; u32 bo_offset = 0; u32 table_offset, table_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index c210625be220..2ce310b31942 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -251,7 +251,7 @@ struct amdgpu_rlc_funcs { * and it also provides a pointer to it which is used by the firmware * to load the clear state in some cases. */ - void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); + void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer); int (*get_cp_table_num)(struct amdgpu_device *adev); int (*resume)(struct amdgpu_device *adev); void (*stop)(struct amdgpu_device *adev); @@ -275,19 +275,19 @@ struct amdgpu_rlc { /* for power gating */ struct amdgpu_bo *save_restore_obj; uint64_t save_restore_gpu_addr; - volatile uint32_t *sr_ptr; + uint32_t *sr_ptr; const u32 *reg_list; u32 reg_list_size; /* for clear state */ struct amdgpu_bo *clear_state_obj; uint64_t clear_state_gpu_addr; - volatile uint32_t *cs_ptr; + uint32_t *cs_ptr; const struct cs_section_def *cs_data; u32 clear_state_size; /* for cp tables */ struct amdgpu_bo *cp_table_obj; uint64_t cp_table_gpu_addr; - volatile uint32_t *cp_table_ptr; + uint32_t *cp_table_ptr; u32 cp_table_size; /* safe mode for updating CG/PG state */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6716ac281c49..8b8a04138711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -534,91 +534,75 @@ bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_rin static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) { struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; - int r = -EOPNOTSUPP; - - switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { - case IP_VERSION(4, 4, 2): - case IP_VERSION(4, 4, 4): - case IP_VERSION(4, 4, 5): - /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ - r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); - break; - case IP_VERSION(5, 0, 0): - case IP_VERSION(5, 0, 1): - case IP_VERSION(5, 0, 2): - case IP_VERSION(5, 0, 5): - case IP_VERSION(5, 2, 0): - case IP_VERSION(5, 2, 2): - case IP_VERSION(5, 2, 4): - case IP_VERSION(5, 2, 5): - case IP_VERSION(5, 2, 6): - case IP_VERSION(5, 2, 3): - case IP_VERSION(5, 2, 1): - case IP_VERSION(5, 2, 7): - if (sdma_instance->funcs->soft_reset_kernel_queue) - r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id); - break; - default: - break; - } - return r; + if (sdma_instance->funcs->soft_reset_kernel_queue) + return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id); + + return -EOPNOTSUPP; } /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device - * @instance_id: ID of the SDMA engine instance to reset + * @instance_id: Logical ID of the SDMA engine instance to reset + * @caller_handles_kernel_queues: Skip kernel queue processing. Caller + * will handle it. * * Returns: 0 on success, or a negative error code on failure. */ -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues) { int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; struct amdgpu_ring *page_ring = &sdma_instance->page; - bool gfx_sched_stopped = false, page_sched_stopped = false; mutex_lock(&sdma_instance->engine_reset_mutex); - /* Stop the scheduler's work queue for the GFX and page rings if they are running. - * This ensures that no new tasks are submitted to the queues while - * the reset is in progress. - */ - if (!amdgpu_ring_sched_ready(gfx_ring)) { + + if (!caller_handles_kernel_queues) { + /* Stop the scheduler's work queue for the GFX and page rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ drm_sched_wqueue_stop(&gfx_ring->sched); - gfx_sched_stopped = true; - } - if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_stop(&page_ring->sched); - page_sched_stopped = true; + if (adev->sdma.has_page_queue) + drm_sched_wqueue_stop(&page_ring->sched); } - if (sdma_instance->funcs->stop_kernel_queue) + if (sdma_instance->funcs->stop_kernel_queue) { sdma_instance->funcs->stop_kernel_queue(gfx_ring); + if (adev->sdma.has_page_queue) + sdma_instance->funcs->stop_kernel_queue(page_ring); + } /* Perform the SDMA reset for the specified instance */ ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { - dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id); goto exit; } - if (sdma_instance->funcs->start_kernel_queue) + if (sdma_instance->funcs->start_kernel_queue) { sdma_instance->funcs->start_kernel_queue(gfx_ring); + if (adev->sdma.has_page_queue) + sdma_instance->funcs->start_kernel_queue(page_ring); + } exit: - /* Restart the scheduler's work queue for the GFX and page rings - * if they were stopped by this function. This allows new tasks - * to be submitted to the queues after the reset is complete. - */ - if (!ret) { - if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { + if (!caller_handles_kernel_queues) { + /* Restart the scheduler's work queue for the GFX and page rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + if (!ret) { + amdgpu_fence_driver_force_completion(gfx_ring); drm_sched_wqueue_start(&gfx_ring->sched); - } - if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) { - drm_sched_wqueue_start(&page_ring->sched); + if (adev->sdma.has_page_queue) { + amdgpu_fence_driver_force_completion(page_ring); + drm_sched_wqueue_start(&page_ring->sched); + } } } mutex_unlock(&sdma_instance->engine_reset_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index e5f8951bbb6f..34311f32be4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -172,7 +172,8 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index d45ebfb642ca..a0b479d5fff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { - u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; + u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -94,9 +94,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; - va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); - r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - va_flags); + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, + AMDGPU_VA_RESERVED_SEQ64_SIZE, + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9c5df35f05b7..aa9ee5dffa45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -123,6 +123,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GWS: case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: + case AMDGPU_PL_MMIO_REMAP: placement->num_placement = 0; return; @@ -226,7 +227,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER); if (r) return r; @@ -299,7 +301,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_bo *abo_src, *abo_dst; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -405,7 +408,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, - false); + false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; } else if (wipe_fence) { @@ -446,7 +449,8 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, return false; if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || - res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL || + res->mem_type == AMDGPU_PL_MMIO_REMAP) return true; if (res->mem_type != TTM_PL_VRAM) @@ -537,10 +541,12 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || old_mem->mem_type == AMDGPU_PL_DOORBELL || + old_mem->mem_type == AMDGPU_PL_MMIO_REMAP || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || new_mem->mem_type == AMDGPU_PL_OA || - new_mem->mem_type == AMDGPU_PL_DOORBELL) { + new_mem->mem_type == AMDGPU_PL_DOORBELL || + new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) { /* Nothing to save here */ amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); @@ -628,6 +634,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.is_iomem = true; mem->bus.caching = ttm_uncached; break; + case AMDGPU_PL_MMIO_REMAP: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->rmmio_remap.bus_addr; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_uncached; + break; default: return -EINVAL; } @@ -645,6 +657,8 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; + else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP) + return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT; return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; } @@ -694,7 +708,7 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; @@ -731,7 +745,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, - readonly, NULL, pages, range); + readonly, NULL, range); out_unlock: mmap_read_unlock(mm); if (r) @@ -783,12 +797,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, * that backs user memory and will ultimately be mapped into the device * address space. */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range) { unsigned long i; for (i = 0; i < ttm->num_pages; ++i) - ttm->pages[i] = pages ? pages[i] : NULL; + ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL; } /* @@ -934,7 +948,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, if (gtt->userptr) { r = amdgpu_ttm_tt_pin_userptr(bdev, ttm); if (r) { - DRM_ERROR("failed to pin userptr\n"); + dev_err(adev->dev, "failed to pin userptr\n"); return r; } } else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) { @@ -1060,7 +1074,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { amdgpu_ttm_tt_unpin_userptr(bdev, ttm); - } else if (ttm->sg && gtt->gobj->import_attach) { + } else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) { struct dma_buf_attachment *attach; attach = gtt->gobj->import_attach; @@ -1354,7 +1368,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && (mem->mem_type == TTM_PL_TT || mem->mem_type == AMDGPU_PL_DOORBELL || - mem->mem_type == AMDGPU_PL_PREEMPT)) { + mem->mem_type == AMDGPU_PL_PREEMPT || + mem->mem_type == AMDGPU_PL_MMIO_REMAP)) { flags |= AMDGPU_PTE_SYSTEM; if (ttm->caching == ttm_cached) @@ -1509,7 +1524,8 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, - &job); + &job, + AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA); if (r) goto out; @@ -1781,7 +1797,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) &ctx->c2p_bo, NULL); if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret); amdgpu_ttm_training_reserve_vram_fini(adev); return ret; } @@ -1793,7 +1809,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) adev, adev->gmc.real_vram_size - reserve_size, reserve_size, &adev->mman.fw_reserved_memory, NULL); if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); + dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret); amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); return ret; @@ -1840,6 +1856,59 @@ static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) adev->mman.ttm_pools = NULL; } +/** + * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO + * @adev: amdgpu device + * + * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the + * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host + * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular + * GEM object (amdgpu_bo_create). + * + * Return: + * * 0 on success or intentional skip (feature not present/unsupported) + * * negative errno on allocation failure + */ +static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev) +{ + struct amdgpu_bo_param bp; + int r; + + /* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */ + if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE) + return 0; + + memset(&bp, 0, sizeof(bp)); + + /* Create exactly one GEM BO in the MMIO_REMAP domain. */ + bp.type = ttm_bo_type_device; /* userspace-mappable GEM */ + bp.size = AMDGPU_GPU_PAGE_SIZE; /* 4K */ + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_MMIO_REMAP; + bp.flags = 0; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo); + if (r) + return r; + + return 0; +} + +/** + * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO + * @adev: amdgpu device + * + * Frees the kernel-owned MMIO_REMAP BO if it was allocated by + * amdgpu_ttm_mmio_remap_bo_init(). + */ +static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_unref(&adev->rmmio_remap.bo); + adev->rmmio_remap.bo = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1864,22 +1933,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->need_swiotlb, dma_addressing_limited(adev->dev)); if (r) { - DRM_ERROR("failed initializing buffer object driver(%d).\n", r); + dev_err(adev->dev, + "failed initializing buffer object driver(%d).\n", r); return r; } r = amdgpu_ttm_pools_init(adev); if (r) { - DRM_ERROR("failed to init ttm pools(%d).\n", r); + dev_err(adev->dev, "failed to init ttm pools(%d).\n", r); return r; } adev->mman.initialized = true; - /* Initialize VRAM pool with all of VRAM divided into pages */ - r = amdgpu_vram_mgr_init(adev); - if (r) { - DRM_ERROR("Failed initializing VRAM heap.\n"); - return r; + if (!adev->gmc.is_app_apu) { + /* Initialize VRAM pool with all of VRAM divided into pages */ + r = amdgpu_vram_mgr_init(adev); + if (r) { + dev_err(adev->dev, "Failed initializing VRAM heap.\n"); + return r; + } } /* Change the size here instead of the init above so only lpfn is affected */ @@ -1958,7 +2030,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", + dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n", (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); /* Compute GTT size, either based on TTM limit @@ -1981,10 +2053,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); if (r) { - DRM_ERROR("Failed initializing GTT heap.\n"); + dev_err(adev->dev, "Failed initializing GTT heap.\n"); return r; } - DRM_INFO("amdgpu: %uM of GTT memory ready.\n", + dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); if (adev->flags & AMD_IS_APU) { @@ -1995,40 +2067,52 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { - DRM_ERROR("Failed initializing doorbell heap.\n"); + dev_err(adev->dev, "Failed initializing doorbell heap.\n"); return r; } /* Create a boorbell page for kernel usages */ r = amdgpu_doorbell_create_kernel_doorbells(adev); if (r) { - DRM_ERROR("Failed to initialize kernel doorbells.\n"); + dev_err(adev->dev, "Failed to initialize kernel doorbells.\n"); + return r; + } + + /* Initialize MMIO-remap pool (single page 4K) */ + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1); + if (r) { + dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n"); return r; } + /* Allocate the singleton MMIO_REMAP BO (4K) if supported */ + r = amdgpu_ttm_mmio_remap_bo_init(adev); + if (r) + return r; + /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); if (r) { - DRM_ERROR("Failed initializing PREEMPT heap.\n"); + dev_err(adev->dev, "Failed initializing PREEMPT heap.\n"); return r; } /* Initialize various on-chip memory pools */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size); if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); + dev_err(adev->dev, "Failed initializing GDS heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size); if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); + dev_err(adev->dev, "Failed initializing gws heap.\n"); return r; } r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size); if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); + dev_err(adev->dev, "Failed initializing oa heap.\n"); return r; } if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, @@ -2060,12 +2144,16 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) /* return the FW reserved memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, + NULL); if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); + + amdgpu_ttm_mmio_remap_bo_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); amdgpu_ttm_drv_reserve_vram_fini(adev); @@ -2078,7 +2166,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) drm_dev_exit(idx); } - amdgpu_vram_mgr_fini(adev); + if (!adev->gmc.is_app_apu) + amdgpu_vram_mgr_fini(adev); amdgpu_gtt_mgr_fini(adev); amdgpu_preempt_mgr_fini(adev); amdgpu_doorbell_fini(adev); @@ -2087,9 +2176,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; - DRM_INFO("amdgpu: ttm finalized\n"); + dev_info(adev->dev, "amdgpu: ttm finalized\n"); } /** @@ -2121,8 +2211,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_KERNEL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); return; } @@ -2130,8 +2221,9 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) DRM_SCHED_PRIORITY_NORMAL, &sched, 1, NULL); if (r) { - DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", - r); + dev_err(adev->dev, + "Failed setting up TTM BO move entity (%d)\n", + r); goto error_free_entity; } } else { @@ -2161,7 +2253,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, struct dma_resv *resv, bool vm_needs_flush, struct amdgpu_job **job, - bool delayed) + bool delayed, u64 k_job_id) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : @@ -2171,7 +2263,7 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, &adev->mman.high_pr; r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, - num_dw * 4, pool, job); + num_dw * 4, pool, job, k_job_id); if (r) return r; @@ -2202,7 +2294,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int r; if (!direct_submit && !ring->sched.ready) { - DRM_ERROR("Trying to move memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -2210,7 +2303,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job, false); + resv, vm_needs_flush, &job, false, + AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER); if (r) return r; @@ -2237,7 +2331,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, error_free: amdgpu_job_free(job); - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, "Error scheduling IBs (%d)\n", r); return r; } @@ -2245,7 +2339,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush, bool delayed) + bool vm_needs_flush, bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2258,7 +2353,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job, delayed); + &job, delayed, k_job_id); if (r) return r; @@ -2328,7 +2423,8 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, goto err; r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, - &next, true, true); + &next, true, true, + AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER); if (r) goto err; @@ -2347,7 +2443,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, struct dma_fence **f, - bool delayed) + bool delayed, + u64 k_job_id) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2356,7 +2453,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, int r; if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to clear memory with ring turned off.\n"); + dev_err(adev->dev, + "Trying to clear memory with ring turned off.\n"); return -EINVAL; } @@ -2376,7 +2474,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true, delayed); + &next, true, delayed, k_job_id); if (r) goto error; @@ -2416,7 +2514,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) man = ttm_manager_type(&adev->mman.bdev, mem_type); break; default: - DRM_ERROR("Trying to evict invalid memory type\n"); + dev_err(adev->dev, "Trying to evict invalid memory type\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 208b7d1d8a27..0be2728aa872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -34,7 +34,8 @@ #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) -#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5) +#define AMDGPU_PL_MMIO_REMAP (TTM_PL_PRIV + 5) +#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 @@ -86,6 +87,7 @@ struct amdgpu_mman { uint32_t discovery_tmr_size; /* fw reserved memory */ struct amdgpu_bo *fw_reserved_memory; + struct amdgpu_bo *fw_reserved_memory_extend; /* firmware VRAM reservation */ u64 fw_vram_usage_start_offset; @@ -154,6 +156,7 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev); bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, struct ttm_resource *res); @@ -180,14 +183,15 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, struct dma_fence **fence, - bool delayed); + bool delayed, + u64 k_job_id); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct hmm_range **range); void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, struct hmm_range *range); @@ -195,7 +199,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages, struct hmm_range **range) { return -EPERM; @@ -211,7 +214,7 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, } #endif -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range); int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, uint64_t *user_addr); int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2505c46a9c3d..e96f24e9ad57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,11 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, + {0x7551, 0xC8} +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -1155,6 +1160,9 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; } + if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf) + adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf); + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { @@ -1387,6 +1395,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 9e89c3487be5..6349aad6da35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -605,6 +605,11 @@ struct amdgpu_firmware { uint32_t pldm_version; }; +struct kicker_device{ + unsigned short device; + u8 revision; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); @@ -632,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index c92b8794aa73..2e039fb778ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -252,6 +252,7 @@ int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, block, pasid, pasid_fn, data, reset); if (!ret) { atomic_inc(&con->page_retirement_req_cnt); + atomic_inc(&con->poison_consumption_count); wake_up(&con->page_retirement_wq); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 295e7186e156..1add21160d21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -44,8 +44,41 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) return userq_ip_mask; } +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *va_map; + u64 user_addr; + u64 size; + int r = 0; + + user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + r = amdgpu_bo_reserve(vm->root.bo, false); + if (r) + return r; + + va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!va_map) { + r = -EINVAL; + goto out_err; + } + /* Only validate the userq whether resident in the VM mapping range */ + if (user_addr >= va_map->start && + va_map->last - user_addr + 1 >= size) { + amdgpu_bo_unreserve(vm->root.bo); + return 0; + } + + r = -EINVAL; +out_err: + amdgpu_bo_unreserve(vm->root.bo); + return r; +} + static int -amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, +amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; @@ -54,6 +87,49 @@ amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, int r = 0; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { + r = userq_funcs->preempt(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_PREEMPTED; + } + } + + return r; +} + +static int +amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { + r = userq_funcs->restore(uq_mgr, queue); + if (r) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + } else { + queue->state = AMDGPU_USERQ_STATE_MAPPED; + } + } + + return r; +} + +static int +amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs = + adev->userq_funcs[queue->queue_type]; + int r = 0; + + if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { r = userq_funcs->unmap(uq_mgr, queue); if (r) queue->state = AMDGPU_USERQ_STATE_HUNG; @@ -112,22 +188,6 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } -int -amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr) -{ - struct amdgpu_usermode_queue *queue; - int queue_id; - int ret = 0; - - mutex_lock(&uq_mgr->userq_mutex); - /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) - ret += queue->state == AMDGPU_USERQ_STATE_MAPPED; - - mutex_unlock(&uq_mgr->userq_mutex); - return ret; -} - static struct amdgpu_usermode_queue * amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) { @@ -318,7 +378,16 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unreserve(queue->db_obj.obj); } amdgpu_bo_unref(&queue->db_obj.obj); + +#if defined(CONFIG_DEBUG_FS) + debugfs_remove_recursive(queue->debugfs_queue); +#endif r = amdgpu_userq_unmap_helper(uq_mgr, queue); + /*TODO: It requires a reset for userq hw unmap error*/ + if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { + drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); + queue->state = AMDGPU_USERQ_STATE_HUNG; + } amdgpu_userq_cleanup(uq_mgr, queue, queue_id); mutex_unlock(&uq_mgr->userq_mutex); @@ -343,6 +412,46 @@ static int amdgpu_userq_priority_permit(struct drm_file *filp, return -EACCES; } +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_mqd_info_read(struct seq_file *m, void *unused) +{ + struct amdgpu_usermode_queue *queue = m->private; + struct amdgpu_bo *bo; + int r; + + if (!queue || !queue->mqd.obj) + return -EINVAL; + + bo = amdgpu_bo_ref(queue->mqd.obj); + r = amdgpu_bo_reserve(bo, true); + if (r) { + amdgpu_bo_unref(&bo); + return -EINVAL; + } + + seq_printf(m, "queue_type: %d\n", queue->queue_type); + seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj)); + + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + return 0; +} + +static int amdgpu_mqd_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, amdgpu_mqd_info_read, inode->i_private); +} + +static const struct file_operations amdgpu_mqd_info_fops = { + .owner = THIS_MODULE, + .open = amdgpu_mqd_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static int amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) { @@ -352,6 +461,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; + char *queue_name; bool skip_map_queue; uint64_t index; int qid, r = 0; @@ -359,27 +469,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; - /* Usermode queues are only supported for GFX IP as of now */ - if (args->in.ip_type != AMDGPU_HW_IP_GFX && - args->in.ip_type != AMDGPU_HW_IP_DMA && - args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { - drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n", - args->in.ip_type); - return -EINVAL; - } - r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; - if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && - (args->in.ip_type != AMDGPU_HW_IP_GFX) && - (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && - !amdgpu_is_tmz(adev)) { - drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n"); - return -EINVAL; - } - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); @@ -411,6 +504,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) r = -ENOMEM; goto unlock; } + + /* Validate the userq virtual address.*/ + if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || + amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + r = -EINVAL; + kfree(queue); + goto unlock; + } queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; @@ -426,6 +528,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); kfree(queue); + r = -EINVAL; goto unlock; } @@ -475,6 +578,18 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } } + queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid); + if (!queue_name) { + r = -ENOMEM; + goto unlock; + } + +#if defined(CONFIG_DEBUG_FS) + /* Queue dentry per client to hold MQD information */ + queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client); + debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops); +#endif + kfree(queue_name); args->out.queue_id = qid; @@ -485,22 +600,45 @@ unlock: return r; } -int amdgpu_userq_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp) +static int amdgpu_userq_input_args_validate(struct drm_device *dev, + union drm_amdgpu_userq *args, + struct drm_file *filp) { - union drm_amdgpu_userq *args = data; - int r; + struct amdgpu_device *adev = drm_to_adev(dev); switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) return -EINVAL; - r = amdgpu_userq_create(filp, args); - if (r) - drm_file_err(filp, "Failed to create usermode queue\n"); - break; + /* Usermode queues are only supported for GFX IP as of now */ + if (args->in.ip_type != AMDGPU_HW_IP_GFX && + args->in.ip_type != AMDGPU_HW_IP_DMA && + args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { + drm_file_err(filp, "Usermode queue doesn't support IP type %u\n", + args->in.ip_type); + return -EINVAL; + } + + if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && + (args->in.ip_type != AMDGPU_HW_IP_GFX) && + (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && + !amdgpu_is_tmz(adev)) { + drm_file_err(filp, "Secure only supported on GFX/Compute queues\n"); + return -EINVAL; + } + if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET || + args->in.queue_va == 0 || + args->in.queue_size == 0) { + drm_file_err(filp, "invalidate userq queue va or size\n"); + return -EINVAL; + } + if (!args->in.wptr_va || !args->in.rptr_va) { + drm_file_err(filp, "invalidate userq queue rptr or wptr\n"); + return -EINVAL; + } + break; case AMDGPU_USERQ_OP_FREE: if (args->in.ip_type || args->in.doorbell_handle || @@ -510,10 +648,34 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, args->in.queue_size || args->in.rptr_va || args->in.wptr_va || - args->in.wptr_va || args->in.mqd || args->in.mqd_size) return -EINVAL; + break; + default: + return -EINVAL; + } + + return 0; +} + +int amdgpu_userq_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_userq *args = data; + int r; + + if (amdgpu_userq_input_args_validate(dev, args, filp) < 0) + return -EINVAL; + + switch (args->in.op) { + case AMDGPU_USERQ_OP_CREATE: + r = amdgpu_userq_create(filp, args); + if (r) + drm_file_err(filp, "Failed to create usermode queue\n"); + break; + + case AMDGPU_USERQ_OP_FREE: r = amdgpu_userq_destroy(filp, args->in.queue_id); if (r) drm_file_err(filp, "Failed to destroy usermode queue\n"); @@ -536,7 +698,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { - r = amdgpu_userq_map_helper(uq_mgr, queue); + r = amdgpu_userq_restore_helper(uq_mgr, queue); if (r) ret = r; } @@ -546,108 +708,106 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) return ret; } +static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + +/* Handle all BOs on the invalidated list, validate them and update the PTs */ static int -amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, + struct amdgpu_vm *vm) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; int ret; - amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + spin_lock(&vm->status_lock); + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, + struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (ret) - DRM_ERROR("Fail to validate\n"); + bo = bo_va->base.bo; + ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); + if (unlikely(ret)) + return ret; - return ret; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + return ret; + + /* This moves the bo_va to the done list */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + return ret; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + return 0; } +/* Make sure the whole VM is ready to be used */ static int -amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); - struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; - struct ww_acquire_ctx *ticket; struct drm_exec exec; - struct amdgpu_bo *bo; - struct dma_resv *resv; - bool clear, unlock; - int ret = 0; + int ret; drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { - ret = amdgpu_vm_lock_pd(vm, &exec, 2); + ret = amdgpu_vm_lock_pd(vm, &exec, 1); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) { - drm_file_err(uq_mgr->file, "Failed to lock PD\n"); + if (unlikely(ret)) goto unlock_all; - } - /* Lock the done list */ - list_for_each_entry(bo_va, &vm->done, base.vm_status) { - bo = bo_va->base.bo; - if (!bo) - continue; - - ret = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_all; - } - } - - spin_lock(&vm->status_lock); - while (!list_empty(&vm->moved)) { - bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, - base.vm_status); - spin_unlock(&vm->status_lock); - - /* Per VM BOs never need to bo cleared in the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false); - if (ret) + ret = amdgpu_vm_lock_done_list(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) goto unlock_all; - spin_lock(&vm->status_lock); - } - ticket = &exec.ticket; - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, - base.vm_status); - resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->status_lock); - - bo = bo_va->base.bo; - ret = amdgpu_userq_validate_vm_bo(NULL, bo); - if (ret) { - drm_file_err(uq_mgr->file, "Failed to validate BO\n"); + /* This validates PDs, PTs and per VM BOs */ + ret = amdgpu_vm_validate(adev, vm, NULL, + amdgpu_userq_validate_vm, + NULL); + if (unlikely(ret)) goto unlock_all; - } - /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) { - clear = false; - unlock = true; - /* The caller is already holding the reservation lock */ - } else if (dma_resv_locking_ctx(resv) == ticket) { - clear = false; - unlock = false; - /* Somebody else is using the BO right now */ - } else { - clear = true; - unlock = false; - } + /* This locks and validates the remaining evicted BOs */ + ret = amdgpu_userq_bo_validate(adev, &exec, vm); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } - ret = amdgpu_vm_bo_update(adev, bo_va, clear); + ret = amdgpu_vm_handle_moved(adev, vm, NULL); + if (ret) + goto unlock_all; - if (unlock) - dma_resv_unlock(resv); - if (ret) - goto unlock_all; + ret = amdgpu_vm_update_pdes(adev, vm, false); + if (ret) + goto unlock_all; - spin_lock(&vm->status_lock); - } - spin_unlock(&vm->status_lock); + /* + * We need to wait for all VM updates to finish before restarting the + * queues. Using the done list like that is now ok since everything is + * locked in place. + */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) + dma_fence_wait(bo_va->last_pt_update, false); + dma_fence_wait(vm->last_update, false); ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); if (ret) @@ -664,11 +824,11 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); int ret; - flush_work(&fpriv->evf_mgr.suspend_work.work); + flush_delayed_work(&fpriv->evf_mgr.suspend_work); mutex_lock(&uq_mgr->userq_mutex); - ret = amdgpu_userq_validate_bos(uq_mgr); + ret = amdgpu_userq_vm_validate(uq_mgr); if (ret) { drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); goto unlock; @@ -693,7 +853,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) /* Try to unmap all the queues in this process ctx */ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { - r = amdgpu_userq_unmap_helper(uq_mgr, queue); + r = amdgpu_userq_preempt_helper(uq_mgr, queue); if (r) ret = r; } @@ -819,7 +979,10 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) cancel_delayed_work_sync(&uqm->resume_work); mutex_lock(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - r = amdgpu_userq_unmap_helper(uqm, queue); + if (adev->in_s0ix) + r = amdgpu_userq_preempt_helper(uqm, queue); + else + r = amdgpu_userq_unmap_helper(uqm, queue); if (r) ret = r; } @@ -844,7 +1007,10 @@ int amdgpu_userq_resume(struct amdgpu_device *adev) list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { mutex_lock(&uqm->userq_mutex); idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { - r = amdgpu_userq_map_helper(uqm, queue); + if (adev->in_s0ix) + r = amdgpu_userq_restore_helper(uqm, queue); + else + r = amdgpu_userq_map_helper(uqm, queue); if (r) ret = r; } @@ -878,7 +1044,7 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - r = amdgpu_userq_unmap_helper(uqm, queue); + r = amdgpu_userq_preempt_helper(uqm, queue); if (r) ret = r; } @@ -912,7 +1078,7 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - r = amdgpu_userq_map_helper(uqm, queue); + r = amdgpu_userq_restore_helper(uqm, queue); if (r) ret = r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index ec040c2fd6c9..c027dd916672 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -65,6 +65,7 @@ struct amdgpu_usermode_queue { struct dma_fence *last_fence; u32 xcp_id; int priority; + struct dentry *debugfs_queue; }; struct amdgpu_userq_funcs { @@ -77,6 +78,12 @@ struct amdgpu_userq_funcs { struct amdgpu_usermode_queue *queue); int (*map)(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue); + int (*preempt)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*restore)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*detect_and_reset)(struct amdgpu_device *adev, + int queue_type); }; /* Usermode queues for gfx */ @@ -113,8 +120,6 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_eviction_fence *ev_fence); -int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr); - void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); @@ -132,4 +137,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, u32 idx); +int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, + u64 expected_size); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index c2a983ff23c9..761bad98da3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -67,6 +67,14 @@ static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv) return le64_to_cpu(*fence_drv->cpu_addr); } +static void +amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv, + u64 seq) +{ + if (fence_drv->cpu_addr) + *fence_drv->cpu_addr = cpu_to_le64(seq); +} + int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq) { @@ -276,7 +284,7 @@ static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, /* Check if hardware has already processed the job */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled_locked(fence)) + if (!dma_fence_is_signaled(fence)) list_add_tail(&userq_fence->link, &fence_drv->fences); else dma_fence_put(fence); @@ -408,6 +416,40 @@ static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) dma_fence_put(fence); } +static void +amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence, + int error) +{ + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + unsigned long flags; + struct dma_fence *f; + + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); + + f = rcu_dereference_protected(&fence->base, + lockdep_is_held(&fence_drv->fence_list_lock)); + if (f && !dma_fence_is_signaled_locked(f)) + dma_fence_set_error(f, error); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); +} + +void +amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq) +{ + struct dma_fence *f = userq->last_fence; + + if (f) { + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f); + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv; + u64 wptr = fence->base.seqno; + + amdgpu_userq_fence_driver_set_error(fence, -ECANCELED); + amdgpu_userq_fence_write(fence_drv, wptr); + amdgpu_userq_fence_driver_process(fence_drv); + + } +} + int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index 97a125ab8a78..d76add2afc77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -67,6 +67,7 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h new file mode 100644 index 000000000000..1e40ca3b1584 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_UTILS_H_ +#define AMDGPU_UTILS_H_ + +/* ---------- Generic 2‑bit capability attribute encoding ---------- + * 00 INVALID, 01 RO, 10 WO, 11 RW + */ +enum amdgpu_cap_attr { + AMDGPU_CAP_ATTR_INVALID = 0, + AMDGPU_CAP_ATTR_RO = 1 << 0, + AMDGPU_CAP_ATTR_WO = 1 << 1, + AMDGPU_CAP_ATTR_RW = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO), +}; + +#define AMDGPU_CAP_ATTR_BITS 2 +#define AMDGPU_CAP_ATTR_MAX ((1U << AMDGPU_CAP_ATTR_BITS) - 1) + +/* Internal helper to build helpers for a given enum NAME */ +#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) \ +enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS }; \ +struct NAME##_caps { \ + DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS); \ +}; \ +static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap) \ +{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; } \ +static inline void NAME##_attr_init(struct NAME##_caps *c) \ +{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); } \ +static inline int NAME##_attr_set(struct NAME##_caps *c, \ + enum NAME##_cap_id cap, enum amdgpu_cap_attr attr) \ +{ \ + if (!c) \ + return -EINVAL; \ + if (cap >= NAME##_COUNT) \ + return -EINVAL; \ + if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX) \ + return -EINVAL; \ + bitmap_write(c->bmap, (unsigned long)attr, \ + NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \ + return 0; \ +} \ +static inline int NAME##_attr_get(const struct NAME##_caps *c, \ + enum NAME##_cap_id cap, enum amdgpu_cap_attr *out) \ +{ \ + unsigned long v; \ + if (!c || !out) \ + return -EINVAL; \ + if (cap >= NAME##_COUNT) \ + return -EINVAL; \ + v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS); \ + *out = (enum amdgpu_cap_attr)v; \ + return 0; \ +} \ +static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; } \ +static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; } \ +static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id) \ +{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; } + +/* Element expander for enum creation */ +#define _CAP_ENUM_ELEM(x) x, + +/* Public macro: declare enum + helpers from an X‑macro list */ +#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO) \ + enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT }; \ + DECLARE_ATTR_CAP_CLASS_HELPERS(NAME) + +#endif /* AMDGPU_UTILS_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 74758b5ffc6c..5c38f0d30c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1136,7 +1136,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, AMDGPU_FENCE_OWNER_UNDEFINED, 64, direct ? AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b9060bcd4806..ce318f5de047 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -449,7 +449,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, AMDGPU_FENCE_OWNER_UNDEFINED, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -540,7 +540,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_FENCE_OWNER_UNDEFINED, ib_size_dw * 4, direct ? AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED, &job); + AMDGPU_IB_POOL_DELAYED, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index c8885c3d54b3..5e0786ea911b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -92,6 +92,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); +static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev); int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) { @@ -134,6 +135,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + mutex_init(&adev->vcn.inst[i].engine_reset_mutex); atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); @@ -183,16 +185,16 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; dev_info(adev->dev, - "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", - enc_major, enc_minor, dec_ver, vep, fw_rev); + "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", + i, enc_major, enc_minor, dec_ver, vep, fw_rev); } else { unsigned int version_major, version_minor, family_id; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n", - version_major, version_minor, family_id); + dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n", + i, version_major, version_minor, family_id); } bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; @@ -255,12 +257,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) return 0; } -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) +void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) { int j; if (adev->vcn.harvest_config & (1 << i)) - return 0; + return; amdgpu_bo_free_kernel( &adev->vcn.inst[i].dpg_sram_bo, @@ -284,10 +286,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) amdgpu_ucode_release(&adev->vcn.inst[0].fw); adev->vcn.inst[i].fw = NULL; } + + if (adev->vcn.reg_list) + amdgpu_vcn_reg_dump_fini(adev); + mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); - - return 0; } bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) @@ -351,8 +355,6 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) if (adev->vcn.harvest_config & (1 << i)) return 0; - cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); - /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset) @@ -404,6 +406,54 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) return 0; } +void amdgpu_vcn_get_profile(struct amdgpu_device *adev) +{ + int r; + + mutex_lock(&adev->vcn.workload_profile_mutex); + + if (adev->vcn.workload_profile_active) { + mutex_unlock(&adev->vcn.workload_profile_mutex); + return; + } + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + true); + if (r) + dev_warn(adev->dev, + "(%d) failed to enable video power profile mode\n", r); + else + adev->vcn.workload_profile_active = true; + mutex_unlock(&adev->vcn.workload_profile_mutex); +} + +void amdgpu_vcn_put_profile(struct amdgpu_device *adev) +{ + bool pg = true; + int r, i; + + mutex_lock(&adev->vcn.workload_profile_mutex); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) { + pg = false; + break; + } + } + + if (pg) { + r = amdgpu_dpm_switch_power_profile( + adev, PP_SMC_POWER_PROFILE_VIDEO, false); + if (r) + dev_warn( + adev->dev, + "(%d) failed to disable video power profile mode\n", + r); + else + adev->vcn.workload_profile_active = false; + } + + mutex_unlock(&adev->vcn.workload_profile_mutex); +} + static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_vcn_inst *vcn_inst = @@ -411,7 +461,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i = vcn_inst->inst, j; - int r = 0; if (adev->vcn.harvest_config & (1 << i)) return; @@ -437,16 +486,11 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) fences += fence[i]; if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { + mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); - mutex_lock(&adev->vcn.workload_profile_mutex); - if (adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); - adev->vcn.workload_profile_active = false; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); + mutex_unlock(&vcn_inst->vcn_pg_lock); + amdgpu_vcn_put_profile(adev); + } else { schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } @@ -456,30 +500,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; - int r = 0; atomic_inc(&vcn_inst->total_submission_cnt); cancel_delayed_work_sync(&vcn_inst->idle_work); - /* We can safely return early here because we've cancelled the - * the delayed work so there is no one else to set it to false - * and we don't care if someone else sets it to true. - */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - - mutex_lock(&adev->vcn.workload_profile_mutex); - if (!adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); - if (r) - dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); - adev->vcn.workload_profile_active = true; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); - -pg_lock: mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); @@ -507,6 +532,7 @@ pg_lock: vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } mutex_unlock(&vcn_inst->vcn_pg_lock); + amdgpu_vcn_get_profile(adev); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -600,7 +626,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, 64, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) goto err; @@ -780,7 +806,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) goto err; @@ -910,7 +936,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -977,7 +1003,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, - &job); + &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -1131,7 +1157,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, { struct amdgpu_vcn_inst *vcn; void *log_buf; - volatile struct amdgpu_vcn_fwlog *plog; + struct amdgpu_vcn_fwlog *plog; unsigned int read_pos, write_pos, available, i, read_bytes = 0; unsigned int read_num[2] = {0}; @@ -1144,7 +1170,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; - plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; + plog = (struct amdgpu_vcn_fwlog *)log_buf; read_pos = plog->rptr; write_pos = plog->wptr; @@ -1211,11 +1237,11 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) { #if defined(CONFIG_DEBUG_FS) - volatile uint32_t *flag = vcn->fw_shared.cpu_addr; + uint32_t *flag = vcn->fw_shared.cpu_addr; void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; - volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; - volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; + struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + vcn->fw_shared.log_offset; *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); fw_log->is_enabled = 1; @@ -1451,3 +1477,161 @@ int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, return ret; } + +/** + * amdgpu_vcn_reset_engine - Reset a specific VCN engine + * @adev: Pointer to the AMDGPU device + * @instance_id: VCN engine instance to reset + * + * Returns: 0 on success, or a negative error code on failure. + */ +static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev, + uint32_t instance_id) +{ + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id]; + int r, i; + + mutex_lock(&vinst->engine_reset_mutex); + /* Stop the scheduler's work queue for the dec and enc rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ + drm_sched_wqueue_stop(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_stop(&vinst->ring_enc[i].sched); + + /* Perform the VCN reset for the specified instance */ + r = vinst->reset(vinst); + if (r) + goto unlock; + r = amdgpu_ring_test_ring(&vinst->ring_dec); + if (r) + goto unlock; + for (i = 0; i < vinst->num_enc_rings; i++) { + r = amdgpu_ring_test_ring(&vinst->ring_enc[i]); + if (r) + goto unlock; + } + amdgpu_fence_driver_force_completion(&vinst->ring_dec); + for (i = 0; i < vinst->num_enc_rings; i++) + amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]); + + /* Restart the scheduler's work queue for the dec and enc rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + drm_sched_wqueue_start(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_start(&vinst->ring_enc[i].sched); + +unlock: + mutex_unlock(&vinst->engine_reset_mutex); + + return r; +} + +/** + * amdgpu_vcn_ring_reset - Reset a VCN ring + * @ring: ring to reset + * @vmid: vmid of guilty job + * @timedout_fence: fence of timed out job + * + * This helper is for VCN blocks without unified queues because + * resetting the engine resets all queues in that case. With + * unified queues we have one queue per engine. + * Returns: 0 on success, or a negative error code on failure. + */ +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->vcn.inst[ring->me].using_unified_queue) + return -EINVAL; + + return amdgpu_vcn_reset_engine(adev, ring->me); +} + +int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count) +{ + adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count, + sizeof(uint32_t), GFP_KERNEL); + if (!adev->vcn.ip_dump) + return -ENOMEM; + adev->vcn.reg_list = reg; + adev->vcn.reg_count = count; + + return 0; +} + +static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev) +{ + kfree(adev->vcn.ip_dump); + adev->vcn.ip_dump = NULL; + adev->vcn.reg_list = NULL; + adev->vcn.reg_count = 0; +} + +void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + u32 inst_off; + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * adev->vcn.reg_count; + /* mmUVD_POWER_STATUS is always readable and is the first in reg_list */ + adev->vcn.ip_dump[inst_off] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i)); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + + if (is_powered) + for (j = 1; j < adev->vcn.reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i)); + } +} + +void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, j; + bool is_powered; + u32 inst_off; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * adev->vcn.reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) != + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < adev->vcn.reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 83adf81defc7..dc8a17bcc3c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -237,6 +237,8 @@ #define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2 +struct amdgpu_hwip_reg_entry; + enum amdgpu_vcn_caps { AMDGPU_VCN_RRMT_ENABLED, }; @@ -330,7 +332,9 @@ struct amdgpu_vcn_inst { struct dpg_pause_state *new_state); int (*set_pg_state)(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); + int (*reset)(struct amdgpu_vcn_inst *vinst); bool using_unified_queue; + struct mutex engine_reset_mutex; }; struct amdgpu_vcn_ras { @@ -360,6 +364,8 @@ struct amdgpu_vcn { bool workload_profile_active; struct mutex workload_profile_mutex; + u32 reg_count; + const struct amdgpu_hwip_reg_entry *reg_list; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -495,7 +501,7 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; - uint8_t pad3[9]; + uint8_t pad3[404]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 @@ -510,7 +516,7 @@ enum vcn_ring_type { int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i); int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i); -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i); +void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i); int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i); int amdgpu_vcn_resume(struct amdgpu_device *adev, int i); void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); @@ -552,5 +558,14 @@ void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *guilty_fence); +int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count); +void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block); +void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p); +void amdgpu_vcn_get_profile(struct amdgpu_device *adev); +void amdgpu_vcn_put_profile(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 13f0cdeb59c4..f96beb96c75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -598,8 +598,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = - ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20; + vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0; vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; @@ -828,11 +828,14 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev) { ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1); ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1); + ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1); ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs, RATELIMIT_MSG_ON_RELEASE); ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs, RATELIMIT_MSG_ON_RELEASE); + ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs, + RATELIMIT_MSG_ON_RELEASE); mutex_init(&adev->virt.ras.ras_telemetry_mutex); @@ -1501,3 +1504,55 @@ void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev) if (virt->ops && virt->ops->req_bad_pages) virt->ops->req_bad_pages(adev); } + +static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev, + struct amdsriov_ras_telemetry *host_telemetry, + bool *hit) +{ + struct amd_sriov_ras_chk_criti *tmp = NULL; + uint32_t checksum, used_size; + + checksum = host_telemetry->header.checksum; + used_size = host_telemetry->header.used_size; + + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + return 0; + + tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) + goto out; + + if (hit) + *hit = tmp->hit ? true : false; + +out: + kfree(tmp); + + return 0; +} + +int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit) +{ + struct amdgpu_virt *virt = &adev->virt; + int r = -EPERM; + + if (!virt->ops || !virt->ops->req_ras_chk_criti) + return -EOPNOTSUPP; + + /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host + * will ignore incoming guest messages. Ratelimit the guest messages to + * prevent guest self DOS. + */ + if (__ratelimit(&virt->ras.ras_chk_criti_rs)) { + mutex_lock(&virt->ras.ras_telemetry_mutex); + if (!virt->ops->req_ras_chk_criti(adev, addr)) + r = amdgpu_virt_cache_chk_criti_hit( + adev, virt->fw_reserve.ras_telemetry, hit); + mutex_unlock(&virt->ras.ras_telemetry_mutex); + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 577c6194db78..d1172c8e58c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -98,6 +98,7 @@ struct amdgpu_virt_ops { int (*req_ras_err_count)(struct amdgpu_device *adev); int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); int (*req_bad_pages)(struct amdgpu_device *adev); + int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr); }; /* @@ -152,8 +153,10 @@ enum AMDGIM_REG_ACCESS_FLAG { AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), /* Use RLC to program GC regs */ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), - /* Use PSP to program L1_TLB_CNTL*/ + /* Use PSP to program L1_TLB_CNTL */ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), + /* Use RLCG to program SQ_CONFIG1 */ + AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4), }; struct amdgim_pf2vf_info_v1 { @@ -250,10 +253,15 @@ struct amdgpu_virt_ras_err_handler_data { struct amdgpu_virt_ras { struct ratelimit_state ras_error_cnt_rs; struct ratelimit_state ras_cper_dump_rs; + struct ratelimit_state ras_chk_criti_rs; struct mutex ras_telemetry_mutex; uint64_t cper_rptr; }; +#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT) + +DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST); + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -265,12 +273,14 @@ struct amdgpu_virt { struct amdgpu_irq_src rcv_irq; struct work_struct flr_work; - struct work_struct bad_pages_work; + struct work_struct req_bad_pages_work; + struct work_struct handle_bad_pages_work; struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; + struct amdgpu_virt_caps virt_caps; uint32_t gim_feature; uint32_t reg_access_mode; int req_init_data_ver; @@ -301,6 +311,9 @@ struct amdgpu_virt { union amd_sriov_ras_caps ras_telemetry_en_caps; struct amdgpu_virt_ras ras; struct amd_sriov_ras_telemetry_error_count count_cache; + + /* hibernate and resume with different VF feature for xgmi enabled system */ + bool is_xgmi_node_migrate_enabled; }; struct amdgpu_video_codec_info; @@ -343,6 +356,10 @@ struct amdgpu_video_codec_info; #define amdgpu_sriov_rlcg_error_report_enabled(adev) \ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) +#define amdgpu_sriov_reg_access_sq_config(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG))) + #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) @@ -386,6 +403,10 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) #define amdgpu_sriov_is_mes_info_enable(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) + +#define amdgpu_virt_xgmi_migrate_enabled(adev) \ + ((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); @@ -434,4 +455,5 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, enum amdgpu_ras_block block); void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev); +int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 155bb9891a17..79bad9cbe2ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -14,7 +14,6 @@ #include "dce_v8_0.h" #endif #include "dce_v10_0.h" -#include "dce_v11_0.h" #include "ivsrcid/ivsrcid_vislands30.h" #include "amdgpu_vkms.h" #include "amdgpu_display.h" @@ -581,13 +580,6 @@ static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block) case CHIP_TONGA: dce_v10_0_disable_dce(adev); break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_VEGAM: - dce_v11_0_disable_dce(adev); - break; case CHIP_TOPAZ: #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_HAINAN: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0ff95a56c2ce..c1a801203949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -128,43 +128,14 @@ struct amdgpu_vm_tlb_seq_struct { }; /** - * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm pointer - * @pasid: the pasid the VM is using on this GPU - * - * Set the pasid this VM is using on this GPU, can also be used to remove the - * pasid by passing in zero. + * amdgpu_vm_assert_locked - check if VM is correctly locked + * @vm: the VM which schould be tested * + * Asserts that the VM root PD is locked. */ -int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid) +static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) { - int r; - - if (vm->pasid == pasid) - return 0; - - if (vm->pasid) { - r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); - if (r < 0) - return r; - - vm->pasid = 0; - } - - if (pasid) { - r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, - GFP_KERNEL)); - if (r < 0) - return r; - - vm->pasid = pasid; - } - - - return 0; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); } /** @@ -181,6 +152,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + amdgpu_vm_assert_locked(vm); spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); @@ -198,6 +170,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); spin_unlock(&vm_bo->vm->status_lock); @@ -213,6 +186,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); spin_unlock(&vm_bo->vm->status_lock); @@ -260,6 +234,7 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); if (vm_bo->bo->parent) { spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); @@ -279,6 +254,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); spin_unlock(&vm_bo->vm->status_lock); @@ -295,10 +271,13 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) { struct amdgpu_vm_bo_base *vm_bo, *tmp; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); list_splice_init(&vm->done, &vm->invalidated); list_for_each_entry(vm_bo, &vm->invalidated, vm_status) vm_bo->moved = true; + list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; @@ -327,6 +306,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); bool shared; + dma_resv_assert_held(bo->tbo.base.resv); spin_lock(&vm->status_lock); shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); if (base->shared != shared) { @@ -485,6 +465,42 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, } /** + * amdgpu_vm_lock_done_list - lock all BOs on the done list + * @vm: vm providing the BOs + * @exec: drm execution context + * @num_fences: number of extra fences to reserve + * + * Lock the BOs on the done list in the DRM execution context. + */ +int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct list_head *prev = &vm->done; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + int ret; + + /* We can only trust prev->next while holding the lock */ + spin_lock(&vm->status_lock); + while (!list_is_head(prev->next, &vm->done)) { + bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + if (bo) { + ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); + if (unlikely(ret)) + return ret; + } + spin_lock(&vm->status_lock); + prev = prev->next; + } + spin_unlock(&vm->status_lock); + + return 0; +} + +/** * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU * * @adev: amdgpu device pointer @@ -616,18 +632,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - - if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { - struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); - - pr_warn_ratelimited("Evicted user BO is not reserved\n"); - if (ti) { - pr_warn_ratelimited("pid %d\n", ti->task.pid); - amdgpu_vm_put_task_info(ti); - } - - return -EINVAL; - } + dma_resv_assert_held(bo->tbo.base.resv); r = validate(param, bo); if (r) @@ -654,22 +659,31 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if VM is not evicting. + * True if VM is not evicting and all VM entities are not stopped */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool empty; bool ret; + amdgpu_vm_assert_locked(vm); + amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); spin_lock(&vm->status_lock); - empty = list_empty(&vm->evicted); + ret &= list_empty(&vm->evicted); spin_unlock(&vm->status_lock); - return ret && empty; + spin_lock(&vm->immediate.lock); + ret &= !vm->immediate.stopped; + spin_unlock(&vm->immediate.lock); + + spin_lock(&vm->delayed.lock); + ret &= !vm->delayed.stopped; + spin_unlock(&vm->delayed.lock); + + return ret; } /** @@ -765,6 +779,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; + struct amdgpu_fence *af; unsigned int patch; int r; @@ -830,6 +845,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; + /* this is part of the job's context */ + af = container_of(fence, struct amdgpu_fence, base); + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } if (vm_flush_needed) { @@ -951,6 +969,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, LIST_HEAD(relocated); int r, idx; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); list_splice_init(&vm->relocated, &relocated); spin_unlock(&vm->status_lock); @@ -966,7 +986,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL); + r = vm->update_funcs->prepare(¶ms, NULL, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES); if (r) goto error; @@ -1135,7 +1156,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, dma_fence_put(tmp); } - r = vm->update_funcs->prepare(¶ms, sync); + r = vm->update_funcs->prepare(¶ms, sync, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE); if (r) goto error_free; @@ -1271,7 +1293,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } else { struct drm_gem_object *obj = &bo->tbo.base; - if (obj->import_attach && bo_va->is_xgmi) { + if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -1328,13 +1350,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here * but in case of something, we filter the flags in first place */ - if (!(mapping->flags & AMDGPU_PTE_READABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE)) update_flags &= ~AMDGPU_PTE_READABLE; - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE)) update_flags &= ~AMDGPU_PTE_WRITEABLE; /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags, + &update_flags); trace_amdgpu_vm_bo_update(mapping); @@ -1475,7 +1498,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, struct dma_fence *fence) { - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_add_prt_cb(adev, fence); kfree(mapping); } @@ -1631,7 +1654,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, * validation */ if (vm->is_compute_context && - bo_va->base.bo->tbo.base.import_attach && + drm_gem_is_imported(&bo_va->base.bo->tbo.base) && (!bo_va->base.bo->tbo.resource || bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) amdgpu_vm_bo_evicted_user(&bo_va->base); @@ -1754,7 +1777,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_prt_get(adev); if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) @@ -1814,7 +1837,7 @@ static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo *bo = bo_va->base.bo; @@ -1873,7 +1896,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo = bo_va->base.bo; @@ -2395,10 +2418,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, else adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; - DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", - vm_size, adev->vm_manager.num_level + 1, - adev->vm_manager.block_size, - adev->vm_manager.fragment_size); + dev_info( + adev->dev, + "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", + vm_size, adev->vm_manager.num_level + 1, + adev->vm_manager.block_size, adev->vm_manager.fragment_size); } /** @@ -2409,13 +2433,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_BOOKKEEP, - true, timeout); + timeout = drm_sched_entity_flush(&vm->immediate, timeout); if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); + return drm_sched_entity_flush(&vm->delayed, timeout); } static void amdgpu_vm_destroy_task_info(struct kref *kref) @@ -2527,6 +2549,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: requested vm * @xcp_id: GPU partition selection id + * @pasid: the pasid the VM is using on this GPU * * Init @vm fields. * @@ -2534,7 +2557,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * 0 for success, error for failure. */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int32_t xcp_id) + int32_t xcp_id, uint32_t pasid) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2565,8 +2588,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2608,7 +2631,16 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_vm_create_task_info(vm); if (r) - DRM_DEBUG("Failed to create task info for VM\n"); + dev_dbg(adev->dev, "Failed to create task info for VM\n"); + + /* Store new PASID in XArray (if non-zero) */ + if (pasid != 0) { + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL)); + if (r < 0) + goto error_free_root; + + vm->pasid = pasid; + } amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2616,6 +2648,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; error_free_root: + /* If PASID was partially set, erase it from XArray before failing */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2659,8 +2696,8 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2721,7 +2758,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - amdgpu_vm_set_pasid(adev, vm, 0); + /* Remove PASID mapping before destroying VM */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); dma_fence_wait(vm->last_tlb_flush, false); @@ -2731,7 +2772,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) { + if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); prt_fini_needed = false; } @@ -2762,10 +2803,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { - if (vm->reserved_vmid[i]) { - amdgpu_vmid_free_reserved(adev, i); - vm->reserved_vmid[i] = false; - } + amdgpu_vmid_free_reserved(adev, vm, i); } ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); @@ -2861,6 +2899,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; /* No valid flags defined yet */ if (args->in.flags) @@ -2869,17 +2908,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; - } - + amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; case AMDGPU_VM_OP_UNRESERVE_VMID: - if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); - fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; - } + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; @@ -2983,7 +3015,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%d)\n", r); + dev_err(adev->dev, "Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(&root); @@ -3017,6 +3049,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fd086efd8457..cf0ec94e8a07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -308,7 +308,7 @@ struct amdgpu_vm_update_params { struct amdgpu_vm_update_funcs { int (*map_table)(struct amdgpu_bo_vm *bo); int (*prepare)(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync); + struct amdgpu_sync *sync, u64 k_job_id); int (*update)(struct amdgpu_vm_update_params *p, struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); @@ -349,12 +349,16 @@ struct amdgpu_vm { /* Memory statistics for this vm, protected by status_lock */ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; + /* + * The following lists contain amdgpu_vm_bo_base objects for either + * PDs, PTs or per VM BOs. The state transits are: + * + * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle + */ + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; - /* BOs for user mode queues that need a validation */ - struct list_head evicted_user; - /* PT BOs which relocated and their parent need an update */ struct list_head relocated; @@ -364,15 +368,29 @@ struct amdgpu_vm { /* All BOs of this VM not currently in the state machine */ struct list_head idle; + /* + * The following lists contain amdgpu_vm_bo_base objects for BOs which + * have their own dma_resv object and not depend on the root PD. Their + * state transits are: + * + * evicted_user or invalidated -> done + */ + + /* BOs for user mode queues that need a validation */ + struct list_head evicted_user; + /* regular invalidated BOs, but not yet updated in the PT */ struct list_head invalidated; - /* BO mappings freed, but not yet updated in the PT */ - struct list_head freed; - /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; + /* + * This list contains amdgpu_bo_va_mapping objects which have been freed + * but not updated in the PTs + */ + struct list_head freed; + /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; @@ -394,7 +412,7 @@ struct amdgpu_vm { struct dma_fence *last_unlocked; unsigned int pasid; - bool reserved_vmid[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; @@ -482,15 +500,14 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid); - long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); +int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -538,11 +555,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, - uint64_t size, uint64_t flags); + uint64_t size, uint32_t flags); int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, - uint64_t size, uint64_t flags); + uint64_t size, uint32_t flags); int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr); @@ -670,4 +687,9 @@ void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, void amdgpu_vm_print_task_info(struct amdgpu_device *adev, struct amdgpu_task_info *task_info); +#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \ + list_for_each_entry(mapping, &(bo_va)->valids, list) +#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \ + list_for_each_entry(mapping, &(bo_va)->invalids, list) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 0c1ef5850a5e..22e2e5b47341 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -40,12 +40,14 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) * * @p: see amdgpu_vm_update_params definition * @sync: sync obj with fences to wait on + * @k_job_id: the id for tracing/debug purposes * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, + u64 k_job_id) { if (!sync) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 30022123b0bf..f794fb1cc06e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_vm.h" +#include "amdgpu_job.h" /* * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt @@ -395,7 +396,8 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL); + r = vm->update_funcs->prepare(¶ms, NULL, + AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR); if (r) goto exit; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 46d9fb433ab2..36805dcfa159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -40,7 +40,7 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) /* Allocate a new job for @count PTE updates */ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, - unsigned int count) + unsigned int count, u64 k_job_id) { enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; @@ -56,7 +56,7 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, - ndw * 4, pool, &p->job); + ndw * 4, pool, &p->job, k_job_id); if (r) return r; @@ -69,16 +69,17 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, * * @p: see amdgpu_vm_update_params definition * @sync: amdgpu_sync object with fences to wait for + * @k_job_id: identifier of the job, for tracing purpose * * Returns: * Negativ errno, 0 for success. */ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, u64 k_job_id) { int r; - r = amdgpu_vm_sdma_alloc_job(p, 0); + r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id); if (r) return r; @@ -249,7 +250,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (r) return r; - r = amdgpu_vm_sdma_alloc_job(p, count); + r = amdgpu_vm_sdma_alloc_job(p, count, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 121ee17b522b..aa78c2ee9e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 1): + return adev->pm.fw_version < 0x0a640500; + default: + return false; + } +} + +static int vpe_get_dpm_level(struct amdgpu_device *adev) +{ + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + return 0; + + return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv)); +} + static void vpe_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -329,11 +349,17 @@ static void vpe_idle_work_handler(struct work_struct *work) unsigned int fences = 0; fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + if (fences) + goto reschedule; + + if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0) + goto reschedule; + + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return; - if (fences == 0) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); - else - schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +reschedule: + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } static int vpe_common_init(struct amdgpu_vpe *vpe) @@ -379,9 +405,10 @@ static int vpe_sw_init(struct amdgpu_ip_block *ip_block) if (ret) goto out; - /* TODO: Add queue reset mask when FW fully supports it */ adev->vpe.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vpe.ring); + if (!amdgpu_sriov_vf(adev)) + adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; ret = amdgpu_vpe_sysfs_reset_mask_init(adev); if (ret) goto out; @@ -435,6 +462,8 @@ static int vpe_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; + cancel_delayed_work_sync(&adev->vpe.idle_work); + vpe_ring_stop(vpe); /* Power off VPE */ @@ -445,10 +474,6 @@ static int vpe_hw_fini(struct amdgpu_ip_block *ip_block) static int vpe_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = ip_block->adev; - - cancel_delayed_work_sync(&adev->vpe.idle_work); - return vpe_hw_fini(ip_block); } @@ -874,6 +899,27 @@ static void vpe_ring_end_use(struct amdgpu_ring *ring) schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } +static int vpe_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_GATE); + if (r) + return r; + r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev, struct device_attribute *attr, char *buf) @@ -942,6 +988,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, + .reset = vpe_ring_reset, }; static void vpe_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index abdc52b0895a..9d934c07fa6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -234,6 +234,9 @@ static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, !adev->gmc.vram_vendor) return 0; + if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) + return 0; + return attr->mode; } @@ -396,43 +399,33 @@ out: return ret; } -static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man, - struct drm_printer *printer) +int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, + uint64_t address, struct amdgpu_vram_block_info *info) { - DRM_DEBUG_DRIVER("Dummy vram mgr debug\n"); -} - -static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man, - struct ttm_resource *res, - const struct ttm_place *place, - size_t size) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n"); - return false; -} + struct amdgpu_vram_mgr_resource *vres; + struct drm_buddy_block *block; + u64 start, size; + int ret = -ENOENT; -static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man, - struct ttm_resource *res, - const struct ttm_place *place, - size_t size) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n"); - return true; -} + mutex_lock(&mgr->lock); + list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) { + list_for_each_entry(block, &vres->blocks, link) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + if ((start <= address) && (address < (start + size))) { + info->start = start; + info->size = size; + memcpy(&info->task, &vres->task, sizeof(vres->task)); + ret = 0; + goto out; + } + } + } -static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man, - struct ttm_resource *res) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n"); -} +out: + mutex_unlock(&mgr->lock); -static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man, - struct ttm_buffer_object *tbo, - const struct ttm_place *place, - struct ttm_resource **res) -{ - DRM_DEBUG_DRIVER("Dummy vram mgr new\n"); - return -ENOSPC; + return ret; } /** @@ -568,6 +561,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, remaining_size -= size; } + vres->task.pid = task_pid_nr(current); + get_task_comm(vres->task.comm, current); + list_add_tail(&vres->vres_node, &mgr->allocated_vres_list); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { struct drm_buddy_block *dcc_block; unsigned long dcc_start; @@ -645,12 +642,15 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, uint64_t vis_usage = 0; mutex_lock(&mgr->lock); + + list_del(&vres->vres_node); + memset(&vres->task, 0, sizeof(vres->task)); + list_for_each_entry(block, &vres->blocks, link) vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks, vres->flags); + amdgpu_vram_mgr_do_reserve(man); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -783,6 +783,23 @@ uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr) } /** + * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks + * + * @adev: amdgpu device pointer + * + * Reset the cleared drm buddy blocks. + */ +void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct drm_buddy *mm = &mgr->mm; + + mutex_lock(&mgr->lock); + drm_buddy_reset_clear(mm, false); + mutex_unlock(&mgr->lock); +} + +/** * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection * * @man: TTM memory type manager @@ -879,14 +896,6 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); } -static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = { - .alloc = amdgpu_dummy_vram_mgr_new, - .free = amdgpu_dummy_vram_mgr_del, - .intersects = amdgpu_dummy_vram_mgr_intersects, - .compatible = amdgpu_dummy_vram_mgr_compatible, - .debug = amdgpu_dummy_vram_mgr_debug -}; - static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .alloc = amdgpu_vram_mgr_new, .free = amdgpu_vram_mgr_del, @@ -917,18 +926,13 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); + INIT_LIST_HEAD(&mgr->allocated_vres_list); mgr->default_page_size = PAGE_SIZE; - if (!adev->gmc.is_app_apu) { - man->func = &amdgpu_vram_mgr_func; - - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - } else { - man->func = &amdgpu_dummy_vram_mgr_func; - DRM_INFO("Setup dummy vram mgr\n"); - } + man->func = &amdgpu_vram_mgr_func; + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index b256cbc2bc27..5f5fd9a911c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -35,12 +35,26 @@ struct amdgpu_vram_mgr { struct list_head reserved_pages; atomic64_t vis_usage; u64 default_page_size; + struct list_head allocated_vres_list; +}; + +struct amdgpu_vres_task { + pid_t pid; + char comm[TASK_COMM_LEN]; +}; + +struct amdgpu_vram_block_info { + u64 start; + u64 size; + struct amdgpu_vres_task task; }; struct amdgpu_vram_mgr_resource { struct ttm_resource base; struct list_head blocks; unsigned long flags; + struct list_head vres_node; + struct amdgpu_vres_task task; }; static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) @@ -66,7 +80,13 @@ to_amdgpu_vram_mgr_resource(struct ttm_resource *res) static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) { - to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; + struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res); + + WARN_ON(ares->flags & DRM_BUDDY_CLEARED); + ares->flags |= DRM_BUDDY_CLEARED; } +int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr, + uint64_t address, struct amdgpu_vram_block_info *info); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 322816805bfb..1083db8cea2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -120,6 +120,25 @@ static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, xcp->valid = true; } +static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr, + int xcp_id) +{ + struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id]; + struct amdgpu_device *adev = xcp_mgr->adev; + uint32_t inst_mask; + uint64_t uid; + int i; + + if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) && + inst_mask) { + i = GET_INST(GC, (ffs(inst_mask) - 1)); + uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info, + AMDGPU_UID_TYPE_XCD, i); + if (uid) + xcp->unique_id = uid; + } +} + int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) { struct amdgpu_device *adev = xcp_mgr->adev; @@ -158,6 +177,7 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) else xcp_mgr->xcp[i].mem_id = mem_id; } + __amdgpu_xcp_set_unique_id(xcp_mgr, i); } xcp_mgr->num_xcps = num_xcps; @@ -218,15 +238,27 @@ int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode); } -int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr) { - int mode; + if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + return true; if (!amdgpu_sriov_vf(xcp_mgr->adev) && xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) - return xcp_mgr->mode; + return true; - if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE && + xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS) + return true; + + return false; +} + +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int mode; + + if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr)) return xcp_mgr->mode; if (!(flags & AMDGPU_XCP_FL_LOCKED)) @@ -394,6 +426,7 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev; p_ddev->driver = adev->xcp_mgr->xcp[i].driver; p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager; + amdgpu_xcp_drm_dev_free(p_ddev); } } @@ -445,6 +478,222 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev, } } +int amdgpu_xcp_select_scheds(struct amdgpu_device *adev, + u32 hw_ip, u32 hw_prio, + struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, + struct drm_gpu_scheduler ***scheds) +{ + u32 sel_xcp_id; + int i; + struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; + + if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) { + u32 least_ref_cnt = ~0; + + fpriv->xcp_id = 0; + for (i = 0; i < xcp_mgr->num_xcps; i++) { + u32 total_ref_cnt; + + total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt); + if (total_ref_cnt < least_ref_cnt) { + fpriv->xcp_id = i; + least_ref_cnt = total_ref_cnt; + } + } + } + sel_xcp_id = fpriv->xcp_id; + + if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) { + *num_scheds = + xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds; + *scheds = + xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched; + atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt); + dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id); + } else { + dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id); + return -ENOENT; + } + + return 0; +} + +static void amdgpu_set_xcp_id(struct amdgpu_device *adev, + uint32_t inst_idx, + struct amdgpu_ring *ring) +{ + int xcp_id; + enum AMDGPU_XCP_IP_BLOCK ip_blk; + uint32_t inst_mask; + + ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; + if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) || + (ring->funcs->type == AMDGPU_RING_TYPE_CPER)) + return; + + inst_mask = 1 << inst_idx; + + switch (ring->funcs->type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_RING_TYPE_COMPUTE: + case AMDGPU_RING_TYPE_KIQ: + ip_blk = AMDGPU_XCP_GFX; + break; + case AMDGPU_RING_TYPE_SDMA: + ip_blk = AMDGPU_XCP_SDMA; + break; + case AMDGPU_RING_TYPE_VCN_ENC: + case AMDGPU_RING_TYPE_VCN_JPEG: + ip_blk = AMDGPU_XCP_VCN; + break; + default: + dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type); + return; + } + + for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { + if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { + ring->xcp_id = xcp_id; + dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, + ring->xcp_id); + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; + break; + } + } +} + +static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int sel_xcp_id) +{ + unsigned int *num_gpu_sched; + + num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id] + .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds; + adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio] + .sched[(*num_gpu_sched)++] = &ring->sched; + dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d", + ring->name, sel_xcp_id, ring->funcs->type, + ring->hw_prio, *num_gpu_sched); +} + +static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < MAX_XCP; i++) { + atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0); + memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched)); + } + + if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return 0; + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = adev->rings[i]; + if (!ring || !ring->sched.ready || ring->no_scheduler) + continue; + + amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id); + + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ + if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst)) + amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); + } + + return 0; +} + +int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->num_rings; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + amdgpu_set_xcp_id(adev, ring->xcc_id, ring); + else + amdgpu_set_xcp_id(adev, ring->me, ring); + } + + return amdgpu_xcp_sched_list_update(adev); +} + +void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + + xcp_mgr->supp_xcp_modes = 0; + + switch (NUM_XCC(adev->gfx.xcc_mask)) { + case 8: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_QPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 6: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_TPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 4: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_DPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 2: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + case 1: + xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | + BIT(AMDGPU_CPX_PARTITION_MODE); + break; + + default: + break; + } +} + +int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + /* TODO: + * Stop user queues and threads, and make sure GPU is empty of work. + */ + + if (flags & AMDGPU_XCP_OPS_KFD) + amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev); + + return 0; +} + +int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int ret = 0; + + if (flags & AMDGPU_XCP_OPS_KFD) { + amdgpu_amdkfd_device_probe(xcp_mgr->adev); + amdgpu_amdkfd_device_init(xcp_mgr->adev); + /* If KFD init failed, return failure */ + if (!xcp_mgr->adev->kfd.init_complete) + ret = -EIO; + } + + return ret; +} + /*====================== xcp sysfs - configuration ======================*/ #define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 454b33f889fb..1928d9e224fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -39,6 +39,8 @@ #define AMDGPU_XCP_NO_PARTITION (~0) +#define AMDGPU_XCP_OPS_KFD (1 << 0) + struct amdgpu_fpriv; enum AMDGPU_XCP_IP_BLOCK { @@ -110,6 +112,7 @@ struct amdgpu_xcp { struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; struct amdgpu_xcp_mgr *xcp_mgr; struct kobject kobj; + uint64_t unique_id; }; struct amdgpu_xcp_mgr { @@ -144,10 +147,6 @@ struct amdgpu_xcp_mgr_funcs { int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); - int (*select_scheds)(struct amdgpu_device *adev, - u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv, - unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds); - int (*update_partition_sched_list)(struct amdgpu_device *adev); }; int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); @@ -176,19 +175,18 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev, struct drm_file *file_priv); void amdgpu_xcp_release_sched(struct amdgpu_device *adev, struct amdgpu_ctx_entity *entity); - +int amdgpu_xcp_select_scheds(struct amdgpu_device *adev, + u32 hw_ip, u32 hw_prio, + struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, + struct drm_gpu_scheduler ***scheds); +void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr); +int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev); +int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); +int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev); void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev); -#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \ - ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ - (adev)->xcp_mgr->funcs->select_scheds ? \ - (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT) -#define amdgpu_xcp_update_partition_sched_list(adev) \ - ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ - (adev)->xcp_mgr->funcs->update_partition_sched_list ? \ - (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0) - static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr) { if (!xcp_mgr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d9ad37711c3e..1ede308a7c67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1771,16 +1771,25 @@ void amdgpu_xgmi_early_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - adev->gmc.xgmi.max_speed = XGMI_SPEED_25GT; + /* 25 GT/s */ + adev->gmc.xgmi.max_speed = 25; adev->gmc.xgmi.max_width = 16; break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): case IP_VERSION(9, 5, 0): - adev->gmc.xgmi.max_speed = XGMI_SPEED_32GT; + /* 32 GT/s */ + adev->gmc.xgmi.max_speed = 32; adev->gmc.xgmi.max_width = 16; break; default: break; } } + +void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev, + uint16_t max_speed, uint8_t max_width) +{ + adev->gmc.xgmi.max_speed = max_speed; + adev->gmc.xgmi.max_width = max_width; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index f994be985f42..5f36aff17e79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -25,12 +25,6 @@ #include <drm/task_barrier.h> #include "amdgpu_ras.h" -enum amdgpu_xgmi_link_speed { - XGMI_SPEED_16GT = 16, - XGMI_SPEED_25GT = 25, - XGMI_SPEED_32GT = 32 -}; - struct amdgpu_hive_info { struct kobject kobj; uint64_t hive_id; @@ -97,7 +91,7 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; struct amdgpu_xgmi_ras *ras; - enum amdgpu_xgmi_link_speed max_speed; + uint16_t max_speed; uint8_t max_width; }; @@ -130,4 +124,10 @@ int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num); void amdgpu_xgmi_early_init(struct amdgpu_device *adev); uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); +void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev, + uint16_t max_speed, uint8_t max_width); + +/* Cleanup macro for use with __free(xgmi_put_hive) */ +DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T)) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 92ca13097aaa..3a79ed7d8031 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -113,7 +113,8 @@ union amd_sriov_reg_access_flags { uint32_t vf_reg_access_mmhub : 1; uint32_t vf_reg_access_gc : 1; uint32_t vf_reg_access_l1_tlb_cntl : 1; - uint32_t reserved : 28; + uint32_t vf_reg_access_sq_config : 1; + uint32_t reserved : 27; } flags; uint32_t all; }; @@ -404,12 +405,17 @@ struct amd_sriov_ras_cper_dump { uint32_t buf[]; }; +struct amd_sriov_ras_chk_criti { + uint32_t hit; +}; + struct amdsriov_ras_telemetry { struct amd_sriov_ras_telemetry_header header; union { struct amd_sriov_ras_telemetry_error_count error_count; struct amd_sriov_ras_cper_dump cper_dump; + struct amd_sriov_ras_chk_criti chk_criti; } body; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 1c083304ae77..811124ff88a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -29,12 +29,11 @@ #include "gfx_v9_4_3.h" #include "gfxhub_v1_2.h" #include "sdma_v4_4_2.h" +#include "amdgpu_ip.h" #define XCP_INST_MASK(num_inst, xcp_id) \ (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) -#define AMDGPU_XCP_OPS_KFD (1 << 0) - void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) { int i; @@ -62,234 +61,6 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } -static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) -{ - return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); -} - -static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, - uint32_t inst_idx, struct amdgpu_ring *ring) -{ - int xcp_id; - enum AMDGPU_XCP_IP_BLOCK ip_blk; - uint32_t inst_mask; - - ring->xcp_id = AMDGPU_XCP_NO_PARTITION; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; - if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) || - (ring->funcs->type == AMDGPU_RING_TYPE_CPER)) - return; - - inst_mask = 1 << inst_idx; - - switch (ring->funcs->type) { - case AMDGPU_HW_IP_GFX: - case AMDGPU_RING_TYPE_COMPUTE: - case AMDGPU_RING_TYPE_KIQ: - ip_blk = AMDGPU_XCP_GFX; - break; - case AMDGPU_RING_TYPE_SDMA: - ip_blk = AMDGPU_XCP_SDMA; - break; - case AMDGPU_RING_TYPE_VCN_ENC: - case AMDGPU_RING_TYPE_VCN_JPEG: - ip_blk = AMDGPU_XCP_VCN; - break; - default: - DRM_ERROR("Not support ring type %d!", ring->funcs->type); - return; - } - - for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { - if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { - ring->xcp_id = xcp_id; - dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name, - ring->xcp_id); - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; - break; - } - } -} - -static void aqua_vanjaram_xcp_gpu_sched_update( - struct amdgpu_device *adev, - struct amdgpu_ring *ring, - unsigned int sel_xcp_id) -{ - unsigned int *num_gpu_sched; - - num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id] - .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds; - adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio] - .sched[(*num_gpu_sched)++] = &ring->sched; - DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name, - sel_xcp_id, ring->funcs->type, - ring->hw_prio, *num_gpu_sched); -} - -static int aqua_vanjaram_xcp_sched_list_update( - struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring; - int i; - - for (i = 0; i < MAX_XCP; i++) { - atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0); - memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched)); - } - - if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) - return 0; - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - ring = adev->rings[i]; - if (!ring || !ring->sched.ready || ring->no_scheduler) - continue; - - aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - - /* VCN may be shared by two partitions under CPX MODE in certain - * configs. - */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - aqua_vanjaram_xcp_vcn_shared(adev)) - aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); - } - - return 0; -} - -static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || - ring->funcs->type == AMDGPU_RING_TYPE_KIQ) - aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring); - else - aqua_vanjaram_set_xcp_id(adev, ring->me, ring); - } - - return aqua_vanjaram_xcp_sched_list_update(adev); -} - -static int aqua_vanjaram_select_scheds( - struct amdgpu_device *adev, - u32 hw_ip, - u32 hw_prio, - struct amdgpu_fpriv *fpriv, - unsigned int *num_scheds, - struct drm_gpu_scheduler ***scheds) -{ - u32 sel_xcp_id; - int i; - - if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) { - u32 least_ref_cnt = ~0; - - fpriv->xcp_id = 0; - for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { - u32 total_ref_cnt; - - total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt); - if (total_ref_cnt < least_ref_cnt) { - fpriv->xcp_id = i; - least_ref_cnt = total_ref_cnt; - } - } - } - sel_xcp_id = fpriv->xcp_id; - - if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) { - *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds; - *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched; - atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt); - DRM_DEBUG("Selected partition #%d", sel_xcp_id); - } else { - DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id); - return -ENOENT; - } - - return 0; -} - -static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - int8_t inst) -{ - int8_t dev_inst; - - switch (block) { - case GC_HWIP: - case SDMA0_HWIP: - /* Both JPEG and VCN as JPEG is only alias of VCN */ - case VCN_HWIP: - dev_inst = adev->ip_map.dev_inst[block][inst]; - break; - default: - /* For rest of the IPs, no look up required. - * Assume 'logical instance == physical instance' for all configs. */ - dev_inst = inst; - break; - } - - return dev_inst; -} - -static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev, - enum amd_hw_ip_block_type block, - uint32_t mask) -{ - uint32_t dev_mask = 0; - int8_t log_inst, dev_inst; - - while (mask) { - log_inst = ffs(mask) - 1; - dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst); - dev_mask |= (1 << dev_inst); - mask &= ~(1 << log_inst); - } - - return dev_mask; -} - -static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev, - enum amd_hw_ip_block_type ip_block, - uint32_t inst_mask) -{ - int l = 0, i; - - while (inst_mask) { - i = ffs(inst_mask) - 1; - adev->ip_map.dev_inst[ip_block][l++] = i; - inst_mask &= ~(1 << i); - } - for (; l < HWIP_MAX_INSTANCE; l++) - adev->ip_map.dev_inst[ip_block][l] = -1; -} - -void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev) -{ - u32 ip_map[][2] = { - { GC_HWIP, adev->gfx.xcc_mask }, - { SDMA0_HWIP, adev->sdma.sdma_mask }, - { VCN_HWIP, adev->vcn.inst_mask }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(ip_map); ++i) - aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]); - - adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst; - adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask; -} - /* Fixed pattern for smn addressing on different AIDs: * bit[34]: indicate cross AID access * bit[33:32]: indicate target AID id @@ -353,11 +124,14 @@ static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) if (adev->nbio.funcs->get_compute_partition_mode) { mode = adev->nbio.funcs->get_compute_partition_mode(adev); - if (mode != derv_mode) + if (mode != derv_mode) { dev_warn( adev->dev, "Mismatch in compute partition mode - reported : %d derived : %d", mode, derv_mode); + if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + amdgpu_device_bus_status_check(adev); + } } return mode; @@ -453,6 +227,7 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, uint16_t *nps_modes) { struct amdgpu_device *adev = xcp_mgr->adev; + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode))) return -EINVAL; @@ -476,12 +251,14 @@ static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr, *num_xcp = 4; *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE); + if (gc_ver == IP_VERSION(9, 5, 0)) + *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: *num_xcp = NUM_XCC(adev->gfx.xcc_mask); *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE); - if (amdgpu_sriov_vf(adev)) + if (gc_ver == IP_VERSION(9, 5, 0)) *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE); break; default: @@ -593,72 +370,6 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, return false; } -static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) -{ - /* TODO: - * Stop user queues and threads, and make sure GPU is empty of work. - */ - - if (flags & AMDGPU_XCP_OPS_KFD) - amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev); - - return 0; -} - -static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) -{ - int ret = 0; - - if (flags & AMDGPU_XCP_OPS_KFD) { - amdgpu_amdkfd_device_probe(xcp_mgr->adev); - amdgpu_amdkfd_device_init(xcp_mgr->adev); - /* If KFD init failed, return failure */ - if (!xcp_mgr->adev->kfd.init_complete) - ret = -EIO; - } - - return ret; -} - -static void -__aqua_vanjaram_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr) -{ - struct amdgpu_device *adev = xcp_mgr->adev; - - xcp_mgr->supp_xcp_modes = 0; - - switch (NUM_XCC(adev->gfx.xcc_mask)) { - case 8: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_DPX_PARTITION_MODE) | - BIT(AMDGPU_QPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 6: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_TPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 4: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_DPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - /* this seems only existing in emulation phase */ - case 2: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - case 1: - xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) | - BIT(AMDGPU_CPX_PARTITION_MODE); - break; - - default: - break; - } -} - static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) { int mode; @@ -705,7 +416,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, goto out; } - ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags); + ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags); if (ret) goto unlock; @@ -718,7 +429,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, *num_xcps = num_xcc / num_xcc_per_xcp; amdgpu_xcp_init(xcp_mgr, *num_xcps, mode); - ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags); + ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags); if (!ret) __aqua_vanjaram_update_available_partition_mode(xcp_mgr); unlock: @@ -801,9 +512,6 @@ struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = { .get_ip_details = &aqua_vanjaram_get_xcp_ip_details, .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info, .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id, - .select_scheds = &aqua_vanjaram_select_scheds, - .update_partition_sched_list = - &aqua_vanjaram_update_partition_sched_list }; static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) @@ -818,7 +526,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) if (ret) return ret; - __aqua_vanjaram_update_supported_modes(adev->xcp_mgr); + amdgpu_xcp_update_supported_modes(adev->xcp_mgr); /* TODO: Default memory node affinity init */ return ret; @@ -858,7 +566,7 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) if (ret) return ret; - aqua_vanjaram_ip_map_init(adev); + amdgpu_ip_map_init(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 427b073de2fc..7a063e44d429 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1246,6 +1246,10 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.last_jump_jiffies = 0; if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); + if (!ectx.ws) { + ret = -ENOMEM; + goto free; + } ectx.ws_size = ws; } else { ectx.ws = NULL; @@ -1494,6 +1498,28 @@ static void atom_get_vbios_version(struct atom_context *ctx) } } +static void atom_get_vbios_build(struct atom_context *ctx) +{ + unsigned char *atom_rom_hdr; + unsigned char *str; + uint16_t base, len; + + base = CU16(ATOM_ROM_TABLE_PTR); + atom_rom_hdr = CSTR(base); + + str = CSTR(CU16(base + ATOM_ROM_CFG_PTR)); + /* Skip config string */ + while (str < atom_rom_hdr && *str++) + ; + /* Skip change list string */ + while (str < atom_rom_hdr && *str++) + ; + + len = min(atom_rom_hdr - str, STRLEN_NORMAL); + if (len) + strscpy(ctx->build_num, str, len); +} + struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) { int base; @@ -1554,6 +1580,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) atom_get_vbios_pn(ctx); atom_get_vbios_date(ctx); atom_get_vbios_version(ctx); + atom_get_vbios_build(ctx); return ctx; } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index b807f6639a4c..825ff28731f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -37,6 +37,7 @@ struct drm_device; #define ATOM_ROM_MAGIC "ATOM" #define ATOM_ROM_MAGIC_PTR 4 +#define ATOM_ROM_CFG_PTR 0xC #define ATOM_ROM_MSG_PTR 0x10 #define ATOM_ROM_CMD_PTR 0x1E #define ATOM_ROM_DATA_PTR 0x20 @@ -151,6 +152,7 @@ struct atom_context { uint32_t version; uint8_t vbios_ver_str[STRLEN_NORMAL]; uint8_t date[STRLEN_NORMAL]; + uint8_t build_num[STRLEN_NORMAL]; }; extern int amdgpu_atom_debug; diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c new file mode 100644 index 000000000000..ed1e25661706 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "cyan_skillfish_ip_offset.h" + +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + + adev->gfx.xcc_mask = 1; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bf7c22f81cda..72ca6538b2e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1141,8 +1141,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } @@ -1462,17 +1461,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev) static void dce_v10_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c deleted file mode 100644 index 47e05783c4a0..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ /dev/null @@ -1,3823 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <drm/drm_edid.h> -#include <drm/drm_fourcc.h> -#include <drm/drm_modeset_helper.h> -#include <drm/drm_modeset_helper_vtables.h> -#include <drm/drm_vblank.h> - -#include "amdgpu.h" -#include "amdgpu_pm.h" -#include "amdgpu_i2c.h" -#include "vid.h" -#include "atom.h" -#include "amdgpu_atombios.h" -#include "atombios_crtc.h" -#include "atombios_encoders.h" -#include "amdgpu_pll.h" -#include "amdgpu_connectors.h" -#include "amdgpu_display.h" -#include "dce_v11_0.h" - -#include "dce/dce_11_0_d.h" -#include "dce/dce_11_0_sh_mask.h" -#include "dce/dce_11_0_enum.h" -#include "oss/oss_3_0_d.h" -#include "oss/oss_3_0_sh_mask.h" -#include "gmc/gmc_8_1_d.h" -#include "gmc/gmc_8_1_sh_mask.h" - -#include "ivsrcid/ivsrcid_vislands30.h" - -static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); -static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); -static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); - -static const u32 crtc_offsets[] = -{ - CRTC0_REGISTER_OFFSET, - CRTC1_REGISTER_OFFSET, - CRTC2_REGISTER_OFFSET, - CRTC3_REGISTER_OFFSET, - CRTC4_REGISTER_OFFSET, - CRTC5_REGISTER_OFFSET, - CRTC6_REGISTER_OFFSET -}; - -static const u32 hpd_offsets[] = -{ - HPD0_REGISTER_OFFSET, - HPD1_REGISTER_OFFSET, - HPD2_REGISTER_OFFSET, - HPD3_REGISTER_OFFSET, - HPD4_REGISTER_OFFSET, - HPD5_REGISTER_OFFSET -}; - -static const uint32_t dig_offsets[] = { - DIG0_REGISTER_OFFSET, - DIG1_REGISTER_OFFSET, - DIG2_REGISTER_OFFSET, - DIG3_REGISTER_OFFSET, - DIG4_REGISTER_OFFSET, - DIG5_REGISTER_OFFSET, - DIG6_REGISTER_OFFSET, - DIG7_REGISTER_OFFSET, - DIG8_REGISTER_OFFSET -}; - -static const struct { - uint32_t reg; - uint32_t vblank; - uint32_t vline; - uint32_t hpd; - -} interrupt_status_offsets[] = { { - .reg = mmDISP_INTERRUPT_STATUS, - .vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE2, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE3, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE4, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK -}, { - .reg = mmDISP_INTERRUPT_STATUS_CONTINUE5, - .vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK, - .vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK, - .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK -} }; - -static const u32 cz_golden_settings_a11[] = -{ - mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000, - mmFBC_MISC, 0x1f311fff, 0x14300000, -}; - -static const u32 cz_mgcg_cgcg_init[] = -{ - mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100, - mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000, -}; - -static const u32 stoney_golden_settings_a11[] = -{ - mmCRTC_DOUBLE_BUFFER_CONTROL, 0x00010101, 0x00010000, - mmFBC_MISC, 0x1f311fff, 0x14302000, -}; - -static const u32 polaris11_golden_settings_a11[] = -{ - mmDCI_CLK_CNTL, 0x00000080, 0x00000000, - mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, - mmFBC_DEBUG1, 0xffffffff, 0x00000008, - mmFBC_MISC, 0x9f313fff, 0x14302008, - mmHDMI_CONTROL, 0x313f031f, 0x00000011, -}; - -static const u32 polaris10_golden_settings_a11[] = -{ - mmDCI_CLK_CNTL, 0x00000080, 0x00000000, - mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, - mmFBC_MISC, 0x9f313fff, 0x14302008, - mmHDMI_CONTROL, 0x313f031f, 0x00000011, -}; - -static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) -{ - switch (adev->asic_type) { - case CHIP_CARRIZO: - amdgpu_device_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_device_program_register_sequence(adev, - cz_golden_settings_a11, - ARRAY_SIZE(cz_golden_settings_a11)); - break; - case CHIP_STONEY: - amdgpu_device_program_register_sequence(adev, - stoney_golden_settings_a11, - ARRAY_SIZE(stoney_golden_settings_a11)); - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - amdgpu_device_program_register_sequence(adev, - polaris11_golden_settings_a11, - ARRAY_SIZE(polaris11_golden_settings_a11)); - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - amdgpu_device_program_register_sequence(adev, - polaris10_golden_settings_a11, - ARRAY_SIZE(polaris10_golden_settings_a11)); - break; - default: - break; - } -} - -static u32 dce_v11_0_audio_endpt_rreg(struct amdgpu_device *adev, - u32 block_offset, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); - r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); - - return r; -} - -static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev, - u32 block_offset, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); - WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); - spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); -} - -static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) -{ - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) - return 0; - else - return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); -} - -static void dce_v11_0_pageflip_interrupt_init(struct amdgpu_device *adev) -{ - unsigned i; - - /* Enable pflip interrupts */ - for (i = 0; i < adev->mode_info.num_crtc; i++) - amdgpu_irq_get(adev, &adev->pageflip_irq, i); -} - -static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev) -{ - unsigned i; - - /* Disable pflip interrupts */ - for (i = 0; i < adev->mode_info.num_crtc; i++) - amdgpu_irq_put(adev, &adev->pageflip_irq, i); -} - -/** - * dce_v11_0_page_flip - pageflip callback. - * - * @adev: amdgpu_device pointer - * @crtc_id: crtc to cleanup pageflip on - * @crtc_base: new address of the crtc (GPU MC address) - * @async: asynchronous flip - * - * Triggers the actual pageflip by updating the primary - * surface base address. - */ -static void dce_v11_0_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base, bool async) -{ - struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; - u32 tmp; - - /* flip immediate for async, default is vsync */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* update pitch */ - WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, - fb->pitches[0] / fb->format->cpp[0]); - /* update the scanout addresses */ - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(crtc_base)); - /* writing to the low address triggers the update */ - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - lower_32_bits(crtc_base)); - /* post the write */ - RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset); -} - -static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, - u32 *vbl, u32 *position) -{ - if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) - return -EINVAL; - - *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]); - *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - return 0; -} - -/** - * dce_v11_0_hpd_sense - hpd sense callback. - * - * @adev: amdgpu_device pointer - * @hpd: hpd (hotplug detect) pin - * - * Checks if a digital monitor is connected (evergreen+). - * Returns true if connected, false if not connected. - */ -static bool dce_v11_0_hpd_sense(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - bool connected = false; - - if (hpd >= adev->mode_info.num_hpd) - return connected; - - if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) & - DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK) - connected = true; - - return connected; -} - -/** - * dce_v11_0_hpd_set_polarity - hpd set polarity callback. - * - * @adev: amdgpu_device pointer - * @hpd: hpd (hotplug detect) pin - * - * Set the polarity of the hpd pin (evergreen+). - */ -static void dce_v11_0_hpd_set_polarity(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - u32 tmp; - bool connected = dce_v11_0_hpd_sense(adev, hpd); - - if (hpd >= adev->mode_info.num_hpd) - return; - - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - if (connected) - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0); - else - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); -} - -/** - * dce_v11_0_hpd_init - hpd setup callback. - * - * @adev: amdgpu_device pointer - * - * Setup the hpd pins used by the card (evergreen+). - * Enable the pin, set the polarity, and enable the hpd interrupts. - */ -static void dce_v11_0_hpd_init(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_connector *connector; - struct drm_connector_list_iter iter; - u32 tmp; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) - continue; - - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP || - connector->connector_type == DRM_MODE_CONNECTOR_LVDS) { - /* don't try to enable hpd on eDP or LVDS avoid breaking the - * aux dp channel on imac and help (but not completely fix) - * https://bugzilla.redhat.com/show_bug.cgi?id=726143 - * also avoid interrupt storms during dpms. - */ - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - continue; - } - - tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1); - WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL, - DC_HPD_CONNECT_INT_DELAY, - AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS); - tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL, - DC_HPD_DISCONNECT_INT_DELAY, - AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); - WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); - dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); - amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); - } - drm_connector_list_iter_end(&iter); -} - -/** - * dce_v11_0_hpd_fini - hpd tear down callback. - * - * @adev: amdgpu_device pointer - * - * Tear down the hpd pins used by the card (evergreen+). - * Disable the hpd interrupts. - */ -static void dce_v11_0_hpd_fini(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_connector *connector; - struct drm_connector_list_iter iter; - u32 tmp; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd) - continue; - - tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0); - WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); - - amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); - } - drm_connector_list_iter_end(&iter); -} - -static u32 dce_v11_0_hpd_get_gpio_reg(struct amdgpu_device *adev) -{ - return mmDC_GPIO_HPD_A; -} - -static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev) -{ - u32 crtc_hung = 0; - u32 crtc_status[6]; - u32 i, j, tmp; - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) { - crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); - crtc_hung |= (1 << i); - } - } - - for (j = 0; j < 10; j++) { - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (crtc_hung & (1 << i)) { - tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); - if (tmp != crtc_status[i]) - crtc_hung &= ~(1 << i); - } - } - if (crtc_hung == 0) - return false; - udelay(100); - } - - return true; -} - -static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev, - bool render) -{ - u32 tmp; - - /* Lockout access through VGA aperture*/ - tmp = RREG32(mmVGA_HDP_CONTROL); - if (render) - tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0); - else - tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - WREG32(mmVGA_HDP_CONTROL, tmp); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - if (render) - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1); - else - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); -} - -static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) -{ - int num_crtc = 0; - - switch (adev->asic_type) { - case CHIP_CARRIZO: - num_crtc = 3; - break; - case CHIP_STONEY: - num_crtc = 2; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - num_crtc = 6; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - num_crtc = 5; - break; - default: - num_crtc = 0; - } - return num_crtc; -} - -void dce_v11_0_disable_dce(struct amdgpu_device *adev) -{ - /*Disable VGA render and enabled crtc, if has DCE engine*/ - if (amdgpu_atombios_has_dce_engine_info(adev)) { - u32 tmp; - int crtc_enabled, i; - - dce_v11_0_set_vga_render_state(adev, false); - - /*Disable crtc*/ - for (i = 0; i < dce_v11_0_get_num_crtc(adev); i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - } - } -} - -static void dce_v11_0_program_fmt(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - int bpc = 0; - u32 tmp = 0; - enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE; - - if (connector) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - bpc = amdgpu_connector_get_monitor_bpc(connector); - dither = amdgpu_connector->dither; - } - - /* LVDS/eDP FMT is set up by atom */ - if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT) - return; - - /* not needed for analog */ - if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) || - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2)) - return; - - if (bpc == 0) - return; - - switch (bpc) { - case 6: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0); - } - break; - case 8: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1); - } - break; - case 10: - if (dither == AMDGPU_FMT_DITHER_ENABLE) { - /* XXX sort out optimal dither settings */ - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2); - } else { - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1); - tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2); - } - break; - default: - /* not needed */ - break; - } - - WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - - -/* display watermark setup */ -/** - * dce_v11_0_line_buffer_adjust - Set up the line buffer - * - * @adev: amdgpu_device pointer - * @amdgpu_crtc: the selected display controller - * @mode: the current display mode on the selected display - * controller - * - * Setup up the line buffer allocation for - * the selected display controller (CIK). - * Returns the line buffer size in pixels. - */ -static u32 dce_v11_0_line_buffer_adjust(struct amdgpu_device *adev, - struct amdgpu_crtc *amdgpu_crtc, - struct drm_display_mode *mode) -{ - u32 tmp, buffer_alloc, i, mem_cfg; - u32 pipe_offset = amdgpu_crtc->crtc_id; - /* - * Line Buffer Setup - * There are 6 line buffers, one for each display controllers. - * There are 3 partitions per LB. Select the number of partitions - * to enable based on the display width. For display widths larger - * than 4096, you need use to use 2 display controllers and combine - * them using the stereo blender. - */ - if (amdgpu_crtc->base.enabled && mode) { - if (mode->crtc_hdisplay < 1920) { - mem_cfg = 1; - buffer_alloc = 2; - } else if (mode->crtc_hdisplay < 2560) { - mem_cfg = 2; - buffer_alloc = 2; - } else if (mode->crtc_hdisplay < 4096) { - mem_cfg = 0; - buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4; - } else { - DRM_DEBUG_KMS("Mode too big for LB!\n"); - mem_cfg = 0; - buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4; - } - } else { - mem_cfg = 1; - buffer_alloc = 0; - } - - tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg); - WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset); - tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc); - WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset); - if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED)) - break; - udelay(1); - } - - if (amdgpu_crtc->base.enabled && mode) { - switch (mem_cfg) { - case 0: - default: - return 4096 * 2; - case 1: - return 1920 * 2; - case 2: - return 2560 * 2; - } - } - - /* controller not enabled, so no lb used */ - return 0; -} - -/** - * cik_get_number_of_dram_channels - get the number of dram channels - * - * @adev: amdgpu_device pointer - * - * Look up the number of video ram channels (CIK). - * Used for display watermark bandwidth calculations - * Returns the number of dram channels - */ -static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev) -{ - u32 tmp = RREG32(mmMC_SHARED_CHMAP); - - switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { - case 0: - default: - return 1; - case 1: - return 2; - case 2: - return 4; - case 3: - return 8; - case 4: - return 3; - case 5: - return 6; - case 6: - return 10; - case 7: - return 12; - case 8: - return 16; - } -} - -struct dce10_wm_params { - u32 dram_channels; /* number of dram channels */ - u32 yclk; /* bandwidth per dram data pin in kHz */ - u32 sclk; /* engine clock in kHz */ - u32 disp_clk; /* display clock in kHz */ - u32 src_width; /* viewport width */ - u32 active_time; /* active display time in ns */ - u32 blank_time; /* blank time in ns */ - bool interlaced; /* mode is interlaced */ - fixed20_12 vsc; /* vertical scale ratio */ - u32 num_heads; /* number of active crtcs */ - u32 bytes_per_pixel; /* bytes per pixel display + overlay */ - u32 lb_size; /* line buffer allocated to pipe */ - u32 vtaps; /* vertical scaler taps */ -}; - -/** - * dce_v11_0_dram_bandwidth - get the dram bandwidth - * - * @wm: watermark calculation data - * - * Calculate the raw dram bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns the dram bandwidth in MBytes/s - */ -static u32 dce_v11_0_dram_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate raw DRAM Bandwidth */ - fixed20_12 dram_efficiency; /* 0.7 */ - fixed20_12 yclk, dram_channels, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - yclk.full = dfixed_const(wm->yclk); - yclk.full = dfixed_div(yclk, a); - dram_channels.full = dfixed_const(wm->dram_channels * 4); - a.full = dfixed_const(10); - dram_efficiency.full = dfixed_const(7); - dram_efficiency.full = dfixed_div(dram_efficiency, a); - bandwidth.full = dfixed_mul(dram_channels, yclk); - bandwidth.full = dfixed_mul(bandwidth, dram_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_dram_bandwidth_for_display - get the dram bandwidth for display - * - * @wm: watermark calculation data - * - * Calculate the dram bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the dram bandwidth for display in MBytes/s - */ -static u32 dce_v11_0_dram_bandwidth_for_display(struct dce10_wm_params *wm) -{ - /* Calculate DRAM Bandwidth and the part allocated to display. */ - fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */ - fixed20_12 yclk, dram_channels, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - yclk.full = dfixed_const(wm->yclk); - yclk.full = dfixed_div(yclk, a); - dram_channels.full = dfixed_const(wm->dram_channels * 4); - a.full = dfixed_const(10); - disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */ - disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a); - bandwidth.full = dfixed_mul(dram_channels, yclk); - bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_data_return_bandwidth - get the data return bandwidth - * - * @wm: watermark calculation data - * - * Calculate the data return bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the data return bandwidth in MBytes/s - */ -static u32 dce_v11_0_data_return_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the display Data return Bandwidth */ - fixed20_12 return_efficiency; /* 0.8 */ - fixed20_12 sclk, bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - sclk.full = dfixed_const(wm->sclk); - sclk.full = dfixed_div(sclk, a); - a.full = dfixed_const(10); - return_efficiency.full = dfixed_const(8); - return_efficiency.full = dfixed_div(return_efficiency, a); - a.full = dfixed_const(32); - bandwidth.full = dfixed_mul(a, sclk); - bandwidth.full = dfixed_mul(bandwidth, return_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_dmif_request_bandwidth - get the dmif bandwidth - * - * @wm: watermark calculation data - * - * Calculate the dmif bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the dmif bandwidth in MBytes/s - */ -static u32 dce_v11_0_dmif_request_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the DMIF Request Bandwidth */ - fixed20_12 disp_clk_request_efficiency; /* 0.8 */ - fixed20_12 disp_clk, bandwidth; - fixed20_12 a, b; - - a.full = dfixed_const(1000); - disp_clk.full = dfixed_const(wm->disp_clk); - disp_clk.full = dfixed_div(disp_clk, a); - a.full = dfixed_const(32); - b.full = dfixed_mul(a, disp_clk); - - a.full = dfixed_const(10); - disp_clk_request_efficiency.full = dfixed_const(8); - disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a); - - bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_available_bandwidth - get the min available bandwidth - * - * @wm: watermark calculation data - * - * Calculate the min available bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the min available bandwidth in MBytes/s - */ -static u32 dce_v11_0_available_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */ - u32 dram_bandwidth = dce_v11_0_dram_bandwidth(wm); - u32 data_return_bandwidth = dce_v11_0_data_return_bandwidth(wm); - u32 dmif_req_bandwidth = dce_v11_0_dmif_request_bandwidth(wm); - - return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth)); -} - -/** - * dce_v11_0_average_bandwidth - get the average available bandwidth - * - * @wm: watermark calculation data - * - * Calculate the average available bandwidth used for display (CIK). - * Used for display watermark bandwidth calculations - * Returns the average available bandwidth in MBytes/s - */ -static u32 dce_v11_0_average_bandwidth(struct dce10_wm_params *wm) -{ - /* Calculate the display mode Average Bandwidth - * DisplayMode should contain the source and destination dimensions, - * timing, etc. - */ - fixed20_12 bpp; - fixed20_12 line_time; - fixed20_12 src_width; - fixed20_12 bandwidth; - fixed20_12 a; - - a.full = dfixed_const(1000); - line_time.full = dfixed_const(wm->active_time + wm->blank_time); - line_time.full = dfixed_div(line_time, a); - bpp.full = dfixed_const(wm->bytes_per_pixel); - src_width.full = dfixed_const(wm->src_width); - bandwidth.full = dfixed_mul(src_width, bpp); - bandwidth.full = dfixed_mul(bandwidth, wm->vsc); - bandwidth.full = dfixed_div(bandwidth, line_time); - - return dfixed_trunc(bandwidth); -} - -/** - * dce_v11_0_latency_watermark - get the latency watermark - * - * @wm: watermark calculation data - * - * Calculate the latency watermark (CIK). - * Used for display watermark bandwidth calculations - * Returns the latency watermark in ns - */ -static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) -{ - /* First calculate the latency in ns */ - u32 mc_latency = 2000; /* 2000 ns. */ - u32 available_bandwidth = dce_v11_0_available_bandwidth(wm); - u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth; - u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth; - u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */ - u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) + - (wm->num_heads * cursor_line_pair_return_time); - u32 latency = mc_latency + other_heads_data_return_time + dc_latency; - u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time; - u32 tmp, dmif_size = 12288; - fixed20_12 a, b, c; - - if (wm->num_heads == 0) - return 0; - - a.full = dfixed_const(2); - b.full = dfixed_const(1); - if ((wm->vsc.full > a.full) || - ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) || - (wm->vtaps >= 5) || - ((wm->vsc.full >= a.full) && wm->interlaced)) - max_src_lines_per_dst_line = 4; - else - max_src_lines_per_dst_line = 2; - - a.full = dfixed_const(available_bandwidth); - b.full = dfixed_const(wm->num_heads); - a.full = dfixed_div(a, b); - tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); - tmp = min(dfixed_trunc(a), tmp); - - lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); - - a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); - b.full = dfixed_const(1000); - c.full = dfixed_const(lb_fill_bw); - b.full = dfixed_div(c, b); - a.full = dfixed_div(a, b); - line_fill_time = dfixed_trunc(a); - - if (line_fill_time < wm->active_time) - return latency; - else - return latency + (line_fill_time - wm->active_time); - -} - -/** - * dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display - check - * average and available dram bandwidth - * - * @wm: watermark calculation data - * - * Check if the display average bandwidth fits in the display - * dram bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm) -{ - if (dce_v11_0_average_bandwidth(wm) <= - (dce_v11_0_dram_bandwidth_for_display(wm) / wm->num_heads)) - return true; - else - return false; -} - -/** - * dce_v11_0_average_bandwidth_vs_available_bandwidth - check - * average and available bandwidth - * - * @wm: watermark calculation data - * - * Check if the display average bandwidth fits in the display - * available bandwidth (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm) -{ - if (dce_v11_0_average_bandwidth(wm) <= - (dce_v11_0_available_bandwidth(wm) / wm->num_heads)) - return true; - else - return false; -} - -/** - * dce_v11_0_check_latency_hiding - check latency hiding - * - * @wm: watermark calculation data - * - * Check latency hiding (CIK). - * Used for display watermark bandwidth calculations - * Returns true if the display fits, false if not. - */ -static bool dce_v11_0_check_latency_hiding(struct dce10_wm_params *wm) -{ - u32 lb_partitions = wm->lb_size / wm->src_width; - u32 line_time = wm->active_time + wm->blank_time; - u32 latency_tolerant_lines; - u32 latency_hiding; - fixed20_12 a; - - a.full = dfixed_const(1); - if (wm->vsc.full > a.full) - latency_tolerant_lines = 1; - else { - if (lb_partitions <= (wm->vtaps + 1)) - latency_tolerant_lines = 1; - else - latency_tolerant_lines = 2; - } - - latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time); - - if (dce_v11_0_latency_watermark(wm) <= latency_hiding) - return true; - else - return false; -} - -/** - * dce_v11_0_program_watermarks - program display watermarks - * - * @adev: amdgpu_device pointer - * @amdgpu_crtc: the selected display controller - * @lb_size: line buffer size - * @num_heads: number of display controllers in use - * - * Calculate and program the display watermarks for the - * selected display controller (CIK). - */ -static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, - struct amdgpu_crtc *amdgpu_crtc, - u32 lb_size, u32 num_heads) -{ - struct drm_display_mode *mode = &amdgpu_crtc->base.mode; - struct dce10_wm_params wm_low, wm_high; - u32 active_time; - u32 line_time = 0; - u32 latency_watermark_a = 0, latency_watermark_b = 0; - u32 tmp, wm_mask, lb_vblank_lead_lines = 0; - - if (amdgpu_crtc->base.enabled && num_heads && mode) { - active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000, - (u32)mode->clock); - line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, - (u32)mode->clock); - line_time = min_t(u32, line_time, 65535); - - /* watermark for high clocks */ - if (adev->pm.dpm_enabled) { - wm_high.yclk = - amdgpu_dpm_get_mclk(adev, false) * 10; - wm_high.sclk = - amdgpu_dpm_get_sclk(adev, false) * 10; - } else { - wm_high.yclk = adev->pm.current_mclk * 10; - wm_high.sclk = adev->pm.current_sclk * 10; - } - - wm_high.disp_clk = mode->clock; - wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = active_time; - wm_high.blank_time = line_time - wm_high.active_time; - wm_high.interlaced = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm_high.interlaced = true; - wm_high.vsc = amdgpu_crtc->vsc; - wm_high.vtaps = 1; - if (amdgpu_crtc->rmx_type != RMX_OFF) - wm_high.vtaps = 2; - wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm_high.lb_size = lb_size; - wm_high.dram_channels = cik_get_number_of_dram_channels(adev); - wm_high.num_heads = num_heads; - - /* set for high clocks */ - latency_watermark_a = min_t(u32, dce_v11_0_latency_watermark(&wm_high), 65535); - - /* possibly force display priority to high */ - /* should really do this at mode validation time... */ - if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || - !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_high) || - !dce_v11_0_check_latency_hiding(&wm_high) || - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } - - /* watermark for low clocks */ - if (adev->pm.dpm_enabled) { - wm_low.yclk = - amdgpu_dpm_get_mclk(adev, true) * 10; - wm_low.sclk = - amdgpu_dpm_get_sclk(adev, true) * 10; - } else { - wm_low.yclk = adev->pm.current_mclk * 10; - wm_low.sclk = adev->pm.current_sclk * 10; - } - - wm_low.disp_clk = mode->clock; - wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = active_time; - wm_low.blank_time = line_time - wm_low.active_time; - wm_low.interlaced = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm_low.interlaced = true; - wm_low.vsc = amdgpu_crtc->vsc; - wm_low.vtaps = 1; - if (amdgpu_crtc->rmx_type != RMX_OFF) - wm_low.vtaps = 2; - wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm_low.lb_size = lb_size; - wm_low.dram_channels = cik_get_number_of_dram_channels(adev); - wm_low.num_heads = num_heads; - - /* set for low clocks */ - latency_watermark_b = min_t(u32, dce_v11_0_latency_watermark(&wm_low), 65535); - - /* possibly force display priority to high */ - /* should really do this at mode validation time... */ - if (!dce_v11_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || - !dce_v11_0_average_bandwidth_vs_available_bandwidth(&wm_low) || - !dce_v11_0_check_latency_hiding(&wm_low) || - (adev->mode_info.disp_priority == 2)) { - DRM_DEBUG_KMS("force priority to high\n"); - } - lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay); - } - - /* select wm A */ - wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1); - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp); - tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time); - WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* select wm B */ - tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2); - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp); - tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b); - tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time); - WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* restore original selection */ - WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask); - - /* save values for DPM */ - amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; - /* Save number of lines the linebuffer leads before the scanout */ - amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; -} - -/** - * dce_v11_0_bandwidth_update - program display watermarks - * - * @adev: amdgpu_device pointer - * - * Calculate and program the display watermarks and line - * buffer allocation (CIK). - */ -static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev) -{ - struct drm_display_mode *mode = NULL; - u32 num_heads = 0, lb_size; - int i; - - amdgpu_display_update_priority(adev); - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->mode_info.crtcs[i]->base.enabled) - num_heads++; - } - for (i = 0; i < adev->mode_info.num_crtc; i++) { - mode = &adev->mode_info.crtcs[i]->base.mode; - lb_size = dce_v11_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode); - dce_v11_0_program_watermarks(adev, adev->mode_info.crtcs[i], - lb_size, num_heads); - } -} - -static void dce_v11_0_audio_get_connected_pins(struct amdgpu_device *adev) -{ - int i; - u32 offset, tmp; - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - offset = adev->mode_info.audio.pin[i].offset; - tmp = RREG32_AUDIO_ENDPT(offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); - if (((tmp & - AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >> - AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1) - adev->mode_info.audio.pin[i].connected = false; - else - adev->mode_info.audio.pin[i].connected = true; - } -} - -static struct amdgpu_audio_pin *dce_v11_0_audio_get_pin(struct amdgpu_device *adev) -{ - int i; - - dce_v11_0_audio_get_connected_pins(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - if (adev->mode_info.audio.pin[i].connected) - return &adev->mode_info.audio.pin[i]; - } - DRM_ERROR("No connected audio pins found!\n"); - return NULL; -} - -static void dce_v11_0_afmt_audio_select_pin(struct drm_encoder *encoder) -{ - struct amdgpu_device *adev = drm_to_adev(encoder->dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 tmp; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id); - WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp); -} - -static void dce_v11_0_audio_write_latency_fields(struct drm_encoder *encoder, - struct drm_display_mode *mode) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - u32 tmp; - int interlace = 0; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - interlace = 1; - if (connector->latency_present[interlace]) { - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - VIDEO_LIPSYNC, connector->video_latency[interlace]); - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - AUDIO_LIPSYNC, connector->audio_latency[interlace]); - } else { - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - VIDEO_LIPSYNC, 0); - tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, - AUDIO_LIPSYNC, 0); - } - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); -} - -static void dce_v11_0_audio_write_speaker_allocation(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - u32 tmp; - u8 *sadb = NULL; - int sad_count; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb); - if (sad_count < 0) { - DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); - sad_count = 0; - } - - /* program the speaker allocation */ - tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - DP_CONNECTION, 0); - /* set HDMI mode */ - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - HDMI_CONNECTION, 1); - if (sad_count) - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - SPEAKER_ALLOCATION, sadb[0]); - else - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, - SPEAKER_ALLOCATION, 5); /* stereo */ - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, - ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); - - kfree(sadb); -} - -static void dce_v11_0_audio_write_sad_regs(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *amdgpu_connector = NULL; - struct cea_sad *sads; - int i, sad_count; - - static const u16 eld_reg_to_type[][2] = { - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, - { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, - }; - - if (!dig || !dig->afmt || !dig->afmt->pin) - return; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(connector, &iter) { - if (connector->encoder == encoder) { - amdgpu_connector = to_amdgpu_connector(connector); - break; - } - } - drm_connector_list_iter_end(&iter); - - if (!amdgpu_connector) { - DRM_ERROR("Couldn't find encoder's connector\n"); - return; - } - - sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads); - if (sad_count < 0) - DRM_ERROR("Couldn't read SADs: %d\n", sad_count); - if (sad_count <= 0) - return; - BUG_ON(!sads); - - for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { - u32 tmp = 0; - u8 stereo_freqs = 0; - int max_channels = -1; - int j; - - for (j = 0; j < sad_count; j++) { - struct cea_sad *sad = &sads[j]; - - if (sad->format == eld_reg_to_type[i][1]) { - if (sad->channels > max_channels) { - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - MAX_CHANNELS, sad->channels); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - DESCRIPTOR_BYTE_2, sad->byte2); - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES, sad->freq); - max_channels = sad->channels; - } - - if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) - stereo_freqs |= sad->freq; - else - break; - } - } - - tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, - SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); - WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); - } - - kfree(sads); -} - -static void dce_v11_0_audio_enable(struct amdgpu_device *adev, - struct amdgpu_audio_pin *pin, - bool enable) -{ - if (!pin) - return; - - WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, - enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); -} - -static const u32 pin_offsets[] = -{ - AUD0_REGISTER_OFFSET, - AUD1_REGISTER_OFFSET, - AUD2_REGISTER_OFFSET, - AUD3_REGISTER_OFFSET, - AUD4_REGISTER_OFFSET, - AUD5_REGISTER_OFFSET, - AUD6_REGISTER_OFFSET, - AUD7_REGISTER_OFFSET, -}; - -static int dce_v11_0_audio_init(struct amdgpu_device *adev) -{ - int i; - - if (!amdgpu_audio) - return 0; - - adev->mode_info.audio.enabled = true; - - switch (adev->asic_type) { - case CHIP_CARRIZO: - case CHIP_STONEY: - adev->mode_info.audio.num_pins = 7; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - adev->mode_info.audio.num_pins = 8; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - adev->mode_info.audio.num_pins = 6; - break; - default: - return -EINVAL; - } - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - adev->mode_info.audio.pin[i].channels = -1; - adev->mode_info.audio.pin[i].rate = -1; - adev->mode_info.audio.pin[i].bits_per_sample = -1; - adev->mode_info.audio.pin[i].status_bits = 0; - adev->mode_info.audio.pin[i].category_code = 0; - adev->mode_info.audio.pin[i].connected = false; - adev->mode_info.audio.pin[i].offset = pin_offsets[i]; - adev->mode_info.audio.pin[i].id = i; - /* disable audio. it will be set up later */ - /* XXX remove once we switch to ip funcs */ - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - return 0; -} - -static void dce_v11_0_audio_fini(struct amdgpu_device *adev) -{ - int i; - - if (!amdgpu_audio) - return; - - if (!adev->mode_info.audio.enabled) - return; - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - - adev->mode_info.audio.enabled = false; -} - -/* - * update the N and CTS parameters for a given pixel clock rate - */ -static void dce_v11_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 tmp; - - tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); - WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); - WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); - WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); - WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); - WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); - tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); - WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); - -} - -/* - * build a HDMI Video Info Frame - */ -static void dce_v11_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, - void *buffer, size_t size) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - uint8_t *frame = buffer + 3; - uint8_t *header = buffer; - - WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, - frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24)); - WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, - frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24)); - WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, - frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24)); - WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, - frame[0xC] | (frame[0xD] << 8) | (header[1] << 24)); -} - -static void dce_v11_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - u32 dto_phase = 24 * 1000; - u32 dto_modulo = clock; - u32 tmp; - - if (!dig || !dig->afmt) - return; - - /* XXX two dtos; generally use dto0 for hdmi */ - /* Express [24MHz / target pixel clock] as an exact rational - * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE - * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator - */ - tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); - tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, - amdgpu_crtc->crtc_id); - WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); - WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase); - WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo); -} - -/* - * update the info frames with the data from the current display mode - */ -static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder, - struct drm_display_mode *mode) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; - struct hdmi_avi_infoframe frame; - ssize_t err; - u32 tmp; - int bpc = 8; - - if (!dig || !dig->afmt) - return; - - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (!dig->afmt->enabled) - return; - - /* hdmi deep color mode general control packets setup, if bpc > 8 */ - if (encoder->crtc) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); - bpc = amdgpu_crtc->bpc; - } - - /* disable audio prior to setting up hw */ - dig->afmt->pin = dce_v11_0_audio_get_pin(adev); - dce_v11_0_audio_enable(adev, dig->afmt->pin, false); - - dce_v11_0_audio_set_dto(encoder, mode->clock); - - tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); - WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */ - - WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000); - - tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset); - switch (bpc) { - case 0: - case 6: - case 8: - case 16: - default: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); - DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n", - connector->name, bpc); - break; - case 10: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1); - DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n", - connector->name); - break; - case 12: - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1); - tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2); - DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n", - connector->name); - break; - } - WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */ - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */ - tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */ - WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* enable audio info frames (frames won't be set until audio is enabled) */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); - WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); - WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); - /* anything other than 0 */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2); - WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); - - WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */ - - tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* set the default audio delay */ - tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); - /* should be suffient for all audio modes and small enough for all hblanks */ - tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); - WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* allow 60958 channel status fields to be updated */ - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); - WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); - if (bpc > 8) - /* clear SW CTS value */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0); - else - /* select SW CTS value */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1); - /* allow hw to sent ACR packets when required */ - tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); - WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); - - dce_v11_0_afmt_update_ACR(encoder, mode->clock); - - tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); - WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); - WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); - tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); - WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); - - dce_v11_0_audio_write_speaker_allocation(encoder); - - WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, - (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT)); - - dce_v11_0_afmt_audio_select_pin(encoder); - dce_v11_0_audio_write_sad_regs(encoder); - dce_v11_0_audio_write_latency_fields(encoder, mode); - - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode); - if (err < 0) { - DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); - return; - } - - err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); - if (err < 0) { - DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); - return; - } - - dce_v11_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer)); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); - /* enable AVI info frames */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); - /* required for audio info values to be updated */ - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); - WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); - - tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); - tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); - WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); - - tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); - /* send audio packets */ - tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); - WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); - - WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF); - WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF); - WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001); - WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001); - - /* enable audio after to setting up hw */ - dce_v11_0_audio_enable(adev, dig->afmt->pin, true); -} - -static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - if (!dig || !dig->afmt) - return; - - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (enable && dig->afmt->enabled) - return; - if (!enable && !dig->afmt->enabled) - return; - - if (!enable && dig->afmt->pin) { - dce_v11_0_audio_enable(adev, dig->afmt->pin, false); - dig->afmt->pin = NULL; - } - - dig->afmt->enabled = enable; - - DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n", - enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id); -} - -static int dce_v11_0_afmt_init(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->mode_info.num_dig; i++) - adev->mode_info.afmt[i] = NULL; - - /* DCE11 has audio blocks tied to DIG encoders */ - for (i = 0; i < adev->mode_info.num_dig; i++) { - adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL); - if (adev->mode_info.afmt[i]) { - adev->mode_info.afmt[i]->offset = dig_offsets[i]; - adev->mode_info.afmt[i]->id = i; - } else { - int j; - for (j = 0; j < i; j++) { - kfree(adev->mode_info.afmt[j]); - adev->mode_info.afmt[j] = NULL; - } - return -ENOMEM; - } - } - return 0; -} - -static void dce_v11_0_afmt_fini(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->mode_info.num_dig; i++) { - kfree(adev->mode_info.afmt[i]); - adev->mode_info.afmt[i] = NULL; - } -} - -static const u32 vga_control_regs[6] = -{ - mmD1VGA_CONTROL, - mmD2VGA_CONTROL, - mmD3VGA_CONTROL, - mmD4VGA_CONTROL, - mmD5VGA_CONTROL, - mmD6VGA_CONTROL, -}; - -static void dce_v11_0_vga_enable(struct drm_crtc *crtc, bool enable) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u32 vga_control; - - vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1; - if (enable) - WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1); - else - WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control); -} - -static void dce_v11_0_grph_enable(struct drm_crtc *crtc, bool enable) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - if (enable) - WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1); - else - WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0); -} - -static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, int atomic) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct drm_framebuffer *target_fb; - struct drm_gem_object *obj; - struct amdgpu_bo *abo; - uint64_t fb_location, tiling_flags; - uint32_t fb_format, fb_pitch_pixels; - u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE); - u32 pipe_config; - u32 tmp, viewport_w, viewport_h; - int r; - bool bypass_lut = false; - - /* no fb bound */ - if (!atomic && !crtc->primary->fb) { - DRM_DEBUG_KMS("No FB bound\n"); - return 0; - } - - if (atomic) - target_fb = fb; - else - target_fb = crtc->primary->fb; - - /* If atomic, assume fb object is pinned & idle & fenced and - * just update base pointers - */ - obj = target_fb->obj[0]; - abo = gem_to_amdgpu_bo(obj); - r = amdgpu_bo_reserve(abo, false); - if (unlikely(r != 0)) - return r; - - if (!atomic) { - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); - if (unlikely(r != 0)) { - amdgpu_bo_unreserve(abo); - return -EINVAL; - } - } - fb_location = amdgpu_bo_gpu_offset(abo); - - amdgpu_bo_get_tiling_flags(abo, &tiling_flags); - amdgpu_bo_unreserve(abo); - - pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); - - switch (target_fb->format->format) { - case DRM_FORMAT_C8: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); - break; - case DRM_FORMAT_XRGB4444: - case DRM_FORMAT_ARGB4444: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_ARGB1555: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_BGRX5551: - case DRM_FORMAT_BGRA5551: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_RGB565: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN16); -#endif - break; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - break; - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ - bypass_lut = true; - break; - case DRM_FORMAT_BGRX1010102: - case DRM_FORMAT_BGRA1010102: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ - bypass_lut = true; - break; - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: - fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2); - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2); -#ifdef __BIG_ENDIAN - fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, - ENDIAN_8IN32); -#endif - break; - default: - DRM_ERROR("Unsupported screen format %p4cc\n", - &target_fb->format->format); - return -EINVAL; - } - - if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) { - unsigned bankw, bankh, mtaspect, tile_split, num_banks; - - bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); - bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); - mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); - tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); - num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE, - ARRAY_2D_TILED_THIN1); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT, - tile_split); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT, - mtaspect); - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE, - ADDR_SURF_MICRO_TILING_DISPLAY); - } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) { - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE, - ARRAY_1D_TILED_THIN1); - } - - fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG, - pipe_config); - - dce_v11_0_vga_enable(crtc, false); - - /* Make sure surface address is updated at vertical blank rather than - * horizontal blank - */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(fb_location)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(fb_location)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK); - WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); - WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap); - - /* - * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT - * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to - * retain the full precision throughout the pipeline. - */ - tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset); - if (bypass_lut) - tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1); - else - tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0); - WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp); - - if (bypass_lut) - DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n"); - - WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0); - WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width); - WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height); - - fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0]; - WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels); - - dce_v11_0_grph_enable(crtc, true); - - WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset, - target_fb->height); - - x &= ~3; - y &= ~1; - WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset, - (x << 16) | y); - viewport_w = crtc->mode.hdisplay; - viewport_h = (crtc->mode.vdisplay + 1) & ~1; - WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset, - (viewport_w << 16) | viewport_h); - - /* set pageflip to happen anywhere in vblank interval */ - WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); - - if (!atomic && fb && fb != crtc->primary->fb) { - abo = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r != 0)) - return r; - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - - /* Bytes per pixel may have changed */ - dce_v11_0_bandwidth_update(adev); - - return 0; -} - -static void dce_v11_0_set_interleave(struct drm_crtc *crtc, - struct drm_display_mode *mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - u32 tmp; - - tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1); - else - tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0); - WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp); -} - -static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u16 *r, *g, *b; - int i; - u32 tmp; - - DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); - - tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0); - WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1); - WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0); - WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0); - - WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0); - - WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff); - WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff); - WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff); - - WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0); - WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); - - WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); - r = crtc->gamma_store; - g = r + crtc->gamma_size; - b = g + crtc->gamma_size; - for (i = 0; i < 256; i++) { - WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - ((*r++ & 0xffc0) << 14) | - ((*g++ & 0xffc0) << 4) | - (*b++ >> 6)); - } - - tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0); - tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR2_DEGAMMA_MODE, 0); - WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0); - WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0); - WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0); - WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp); - - /* XXX match this to the depth of the crtc fmt block, move to modeset? */ - WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0); - /* XXX this only needs to be programmed once per crtc at startup, - * not sure where the best place for it is - */ - tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1); - WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - if (dig->linkb) - return 1; - else - return 0; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - if (dig->linkb) - return 3; - else - return 2; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - if (dig->linkb) - return 5; - else - return 4; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: - return 6; - default: - DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); - return 0; - } -} - -/** - * dce_v11_0_pick_pll - Allocate a PPLL for use by the crtc. - * - * @crtc: drm crtc - * - * Returns the PPLL (Pixel PLL) to be used by the crtc. For DP monitors - * a single PPLL can be used for all DP crtcs/encoders. For non-DP - * monitors a dedicated PPLL must be used. If a particular board has - * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming - * as there is no need to program the PLL itself. If we are not able to - * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to - * avoid messing up an existing monitor. - * - * Asic specific PLL information - * - * DCE 10.x - * Tonga - * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) - * CI - * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC - * - */ -static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - u32 pll_in_use; - int pll; - - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - struct amdgpu_encoder *amdgpu_encoder = - to_amdgpu_encoder(amdgpu_crtc->encoder); - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - - if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) - return ATOM_DP_DTO; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - if (dig->linkb) - return ATOM_COMBOPHY_PLL1; - else - return ATOM_COMBOPHY_PLL0; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - if (dig->linkb) - return ATOM_COMBOPHY_PLL3; - else - return ATOM_COMBOPHY_PLL2; - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - if (dig->linkb) - return ATOM_COMBOPHY_PLL5; - else - return ATOM_COMBOPHY_PLL4; - default: - DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); - return ATOM_PPLL_INVALID; - } - } - - if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) { - if (adev->clock.dp_extclk) - /* skip PPLL programming if using ext clock */ - return ATOM_PPLL_INVALID; - else { - /* use the same PPLL for all DP monitors */ - pll = amdgpu_pll_get_shared_dp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - } else { - /* use the same PPLL for all monitors with the same clock */ - pll = amdgpu_pll_get_shared_nondp_ppll(crtc); - if (pll != ATOM_PPLL_INVALID) - return pll; - } - - /* XXX need to determine what plls are available on each DCE11 part */ - pll_in_use = amdgpu_pll_get_use_mask(crtc); - if (adev->flags & AMD_IS_APU) { - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - if (!(pll_in_use & (1 << ATOM_PPLL0))) - return ATOM_PPLL0; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; - } else { - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; - if (!(pll_in_use & (1 << ATOM_PPLL1))) - return ATOM_PPLL1; - if (!(pll_in_use & (1 << ATOM_PPLL0))) - return ATOM_PPLL0; - DRM_ERROR("unable to allocate a PPLL\n"); - return ATOM_PPLL_INVALID; - } - return ATOM_PPLL_INVALID; -} - -static void dce_v11_0_lock_cursor(struct drm_crtc *crtc, bool lock) -{ - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - uint32_t cur_lock; - - cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset); - if (lock) - cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1); - else - cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0); - WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock); -} - -static void dce_v11_0_hide_cursor(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - u32 tmp; - - tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0); - WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static void dce_v11_0_show_cursor(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - u32 tmp; - - WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, - upper_32_bits(amdgpu_crtc->cursor_addr)); - WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, - lower_32_bits(amdgpu_crtc->cursor_addr)); - - tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1); - tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2); - WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp); -} - -static int dce_v11_0_cursor_move_locked(struct drm_crtc *crtc, - int x, int y) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - int xorigin = 0, yorigin = 0; - - amdgpu_crtc->cursor_x = x; - amdgpu_crtc->cursor_y = y; - - /* avivo cursor are offset into the total surface */ - x += crtc->x; - y += crtc->y; - DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y); - - if (x < 0) { - xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { - yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); - y = 0; - } - - WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y); - WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin); - WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset, - ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1)); - - return 0; -} - -static int dce_v11_0_crtc_cursor_move(struct drm_crtc *crtc, - int x, int y) -{ - int ret; - - dce_v11_0_lock_cursor(crtc, true); - ret = dce_v11_0_cursor_move_locked(crtc, x, y); - dce_v11_0_lock_cursor(crtc, false); - - return ret; -} - -static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, - struct drm_file *file_priv, - uint32_t handle, - uint32_t width, - uint32_t height, - int32_t hot_x, - int32_t hot_y) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_gem_object *obj; - struct amdgpu_bo *aobj; - int ret; - - if (!handle) { - /* turn off cursor */ - dce_v11_0_hide_cursor(crtc); - obj = NULL; - goto unpin; - } - - if ((width > amdgpu_crtc->max_cursor_width) || - (height > amdgpu_crtc->max_cursor_height)) { - DRM_ERROR("bad cursor width or height %d x %d\n", width, height); - return -EINVAL; - } - - obj = drm_gem_object_lookup(file_priv, handle); - if (!obj) { - DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id); - return -ENOENT; - } - - aobj = gem_to_amdgpu_bo(obj); - ret = amdgpu_bo_reserve(aobj, false); - if (ret != 0) { - drm_gem_object_put(obj); - return ret; - } - - aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); - amdgpu_bo_unreserve(aobj); - if (ret) { - DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_put(obj); - return ret; - } - amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); - - dce_v11_0_lock_cursor(crtc, true); - - if (width != amdgpu_crtc->cursor_width || - height != amdgpu_crtc->cursor_height || - hot_x != amdgpu_crtc->cursor_hot_x || - hot_y != amdgpu_crtc->cursor_hot_y) { - int x, y; - - x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x; - y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y; - - dce_v11_0_cursor_move_locked(crtc, x, y); - - amdgpu_crtc->cursor_width = width; - amdgpu_crtc->cursor_height = height; - amdgpu_crtc->cursor_hot_x = hot_x; - amdgpu_crtc->cursor_hot_y = hot_y; - } - - dce_v11_0_show_cursor(crtc); - dce_v11_0_lock_cursor(crtc, false); - -unpin: - if (amdgpu_crtc->cursor_bo) { - struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - ret = amdgpu_bo_reserve(aobj, true); - if (likely(ret == 0)) { - amdgpu_bo_unpin(aobj); - amdgpu_bo_unreserve(aobj); - } - drm_gem_object_put(amdgpu_crtc->cursor_bo); - } - - amdgpu_crtc->cursor_bo = obj; - return 0; -} - -static void dce_v11_0_cursor_reset(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - if (amdgpu_crtc->cursor_bo) { - dce_v11_0_lock_cursor(crtc, true); - - dce_v11_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x, - amdgpu_crtc->cursor_y); - - dce_v11_0_show_cursor(crtc); - - dce_v11_0_lock_cursor(crtc, false); - } -} - -static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) -{ - dce_v11_0_crtc_load_lut(crtc); - - return 0; -} - -static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - drm_crtc_cleanup(crtc); - kfree(amdgpu_crtc); -} - -static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { - .cursor_set2 = dce_v11_0_crtc_cursor_set2, - .cursor_move = dce_v11_0_crtc_cursor_move, - .gamma_set = dce_v11_0_crtc_gamma_set, - .set_config = amdgpu_display_crtc_set_config, - .destroy = dce_v11_0_crtc_destroy, - .page_flip_target = amdgpu_display_crtc_page_flip_target, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = amdgpu_enable_vblank_kms, - .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -}; - -static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - unsigned type; - - switch (mode) { - case DRM_MODE_DPMS_ON: - amdgpu_crtc->enabled = true; - amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE); - dce_v11_0_vga_enable(crtc, true); - amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); - dce_v11_0_vga_enable(crtc, false); - /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - amdgpu_irq_update(adev, &adev->crtc_irq, type); - amdgpu_irq_update(adev, &adev->pageflip_irq, type); - drm_crtc_vblank_on(crtc); - dce_v11_0_crtc_load_lut(crtc); - break; - case DRM_MODE_DPMS_STANDBY: - case DRM_MODE_DPMS_SUSPEND: - case DRM_MODE_DPMS_OFF: - drm_crtc_vblank_off(crtc); - if (amdgpu_crtc->enabled) { - dce_v11_0_vga_enable(crtc, true); - amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE); - dce_v11_0_vga_enable(crtc, false); - } - amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE); - amdgpu_crtc->enabled = false; - break; - } - /* adjust pm to dpms */ - amdgpu_dpm_compute_clocks(adev); -} - -static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc) -{ - /* disable crtc pair power gating before programming */ - amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE); - amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE); - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); -} - -static void dce_v11_0_crtc_commit(struct drm_crtc *crtc) -{ - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON); - amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE); -} - -static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_atom_ss ss; - int i; - - dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - if (crtc->primary->fb) { - int r; - struct amdgpu_bo *abo; - - abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); - r = amdgpu_bo_reserve(abo, true); - if (unlikely(r)) - DRM_ERROR("failed to reserve abo before unpin\n"); - else { - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - } - /* disable the GRPH */ - dce_v11_0_grph_enable(crtc, false); - - amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE); - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->mode_info.crtcs[i] && - adev->mode_info.crtcs[i]->enabled && - i != amdgpu_crtc->crtc_id && - amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) { - /* one other crtc is using this pll don't turn - * off the pll - */ - goto done; - } - } - - switch (amdgpu_crtc->pll_id) { - case ATOM_PPLL0: - case ATOM_PPLL1: - case ATOM_PPLL2: - /* disable the ppll */ - amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); - break; - case ATOM_COMBOPHY_PLL0: - case ATOM_COMBOPHY_PLL1: - case ATOM_COMBOPHY_PLL2: - case ATOM_COMBOPHY_PLL3: - case ATOM_COMBOPHY_PLL4: - case ATOM_COMBOPHY_PLL5: - /* disable the ppll */ - amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); - break; - default: - break; - } -done: - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->adjusted_clock = 0; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; -} - -static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, struct drm_framebuffer *old_fb) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - if (!amdgpu_crtc->adjusted_clock) - return -EINVAL; - - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - struct amdgpu_encoder *amdgpu_encoder = - to_amdgpu_encoder(amdgpu_crtc->encoder); - int encoder_mode = - amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder); - - /* SetPixelClock calculates the plls and ss values now */ - amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, - amdgpu_crtc->pll_id, - encoder_mode, amdgpu_encoder->encoder_id, - adjusted_mode->clock, 0, 0, 0, 0, - amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss); - } else { - amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); - } - amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); - dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); - amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); - amdgpu_atombios_crtc_scaler_setup(crtc); - dce_v11_0_cursor_reset(crtc); - /* update the hw version fpr dpm */ - amdgpu_crtc->hw_mode = *adjusted_mode; - - return 0; -} - -static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_encoder *encoder; - - /* assign the encoder to the amdgpu crtc to avoid repeated lookups later */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->crtc == crtc) { - amdgpu_crtc->encoder = encoder; - amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder); - break; - } - } - if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) { - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - return false; - } - if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) - return false; - if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) - return false; - /* pick pll */ - amdgpu_crtc->pll_id = dce_v11_0_pick_pll(crtc); - /* if we can't get a PPLL for a non-DP encoder, fail */ - if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) && - !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) - return false; - - return true; -} - -static int dce_v11_0_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - return dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); -} - -static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1); -} - -static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { - .dpms = dce_v11_0_crtc_dpms, - .mode_fixup = dce_v11_0_crtc_mode_fixup, - .mode_set = dce_v11_0_crtc_mode_set, - .mode_set_base = dce_v11_0_crtc_set_base, - .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic, - .prepare = dce_v11_0_crtc_prepare, - .commit = dce_v11_0_crtc_commit, - .disable = dce_v11_0_crtc_disable, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - -static void dce_v11_0_panic_flush(struct drm_plane *plane) -{ - struct drm_framebuffer *fb; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_device *adev; - uint32_t fb_format; - - if (!plane->fb) - return; - - fb = plane->fb; - amdgpu_crtc = to_amdgpu_crtc(plane->crtc); - adev = drm_to_adev(fb->dev); - - /* Disable DC tiling */ - fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); - fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; - WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); - -} - -static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = { - .get_scanout_buffer = amdgpu_display_get_scanout_buffer, - .panic_flush = dce_v11_0_panic_flush, -}; - -static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) -{ - struct amdgpu_crtc *amdgpu_crtc; - - amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + - (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); - if (amdgpu_crtc == NULL) - return -ENOMEM; - - drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v11_0_crtc_funcs); - - drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); - amdgpu_crtc->crtc_id = index; - adev->mode_info.crtcs[index] = amdgpu_crtc; - - amdgpu_crtc->max_cursor_width = 128; - amdgpu_crtc->max_cursor_height = 128; - adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; - adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - - switch (amdgpu_crtc->crtc_id) { - case 0: - default: - amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET; - break; - case 1: - amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET; - break; - case 2: - amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET; - break; - case 3: - amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET; - break; - case 4: - amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET; - break; - case 5: - amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET; - break; - } - - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->adjusted_clock = 0; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs); - drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs); - - return 0; -} - -static int dce_v11_0_early_init(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg; - adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg; - - dce_v11_0_set_display_funcs(adev); - - adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev); - - switch (adev->asic_type) { - case CHIP_CARRIZO: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 9; - break; - case CHIP_STONEY: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 9; - break; - case CHIP_POLARIS10: - case CHIP_VEGAM: - adev->mode_info.num_hpd = 6; - adev->mode_info.num_dig = 6; - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - adev->mode_info.num_hpd = 5; - adev->mode_info.num_dig = 5; - break; - default: - /* FIXME: not supported yet */ - return -EINVAL; - } - - dce_v11_0_set_irq_funcs(adev); - - return 0; -} - -static int dce_v11_0_sw_init(struct amdgpu_ip_block *ip_block) -{ - int r, i; - struct amdgpu_device *adev = ip_block->adev; - - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); - if (r) - return r; - } - - for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); - if (r) - return r; - } - - /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); - if (r) - return r; - - adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs; - - adev_to_drm(adev)->mode_config.async_page_flip = true; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; - - adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - - r = amdgpu_display_modeset_create_props(adev); - if (r) - return r; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - - /* allocate crtcs */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = dce_v11_0_crtc_init(adev, i); - if (r) - return r; - } - - if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_display_print_display_setup(adev_to_drm(adev)); - else - return -EINVAL; - - /* setup afmt */ - r = dce_v11_0_afmt_init(adev); - if (r) - return r; - - r = dce_v11_0_audio_init(adev); - if (r) - return r; - - /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ - adev_to_drm(adev)->vblank_disable_immediate = true; - - r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); - if (r) - return r; - - INIT_DELAYED_WORK(&adev->hotplug_work, - amdgpu_display_hotplug_work_func); - - drm_kms_helper_poll_init(adev_to_drm(adev)); - - adev->mode_info.mode_config_initialized = true; - return 0; -} - -static int dce_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - - drm_edid_free(adev->mode_info.bios_hardcoded_edid); - - drm_kms_helper_poll_fini(adev_to_drm(adev)); - - dce_v11_0_audio_fini(adev); - - dce_v11_0_afmt_fini(adev); - - drm_mode_config_cleanup(adev_to_drm(adev)); - adev->mode_info.mode_config_initialized = false; - - return 0; -} - -static int dce_v11_0_hw_init(struct amdgpu_ip_block *ip_block) -{ - int i; - struct amdgpu_device *adev = ip_block->adev; - - dce_v11_0_init_golden_registers(adev); - - /* disable vga render */ - dce_v11_0_set_vga_render_state(adev, false); - /* init dig PHYs, disp eng pll */ - amdgpu_atombios_crtc_powergate_init(adev); - amdgpu_atombios_encoder_init_dig(adev); - if ((adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12) || - (adev->asic_type == CHIP_VEGAM)) { - amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, - DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); - amdgpu_atombios_crtc_set_dce_clock(adev, 0, - DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS); - } else { - amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); - } - - /* initialize hpd */ - dce_v11_0_hpd_init(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - dce_v11_0_pageflip_interrupt_init(adev); - - return 0; -} - -static int dce_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) -{ - int i; - struct amdgpu_device *adev = ip_block->adev; - - dce_v11_0_hpd_fini(adev); - - for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - } - - dce_v11_0_pageflip_interrupt_fini(adev); - - flush_delayed_work(&adev->hotplug_work); - - return 0; -} - -static int dce_v11_0_suspend(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int r; - - r = amdgpu_display_suspend_helper(adev); - if (r) - return r; - - adev->mode_info.bl_level = - amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - - return dce_v11_0_hw_fini(ip_block); -} - -static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int ret; - - amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, - adev->mode_info.bl_level); - - ret = dce_v11_0_hw_init(ip_block); - - /* turn on the BL */ - if (adev->mode_info.bl_encoder) { - u8 bl_level = amdgpu_display_backlight_get_level(adev, - adev->mode_info.bl_encoder); - amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder, - bl_level); - } - if (ret) - return ret; - - return amdgpu_display_resume_helper(adev); -} - -static bool dce_v11_0_is_idle(struct amdgpu_ip_block *ip_block) -{ - return true; -} - -static int dce_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) -{ - u32 srbm_soft_reset = 0, tmp; - struct amdgpu_device *adev = ip_block->adev; - - if (dce_v11_0_is_display_hung(adev)) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - /* Wait a little for things to settle down */ - udelay(50); - } - return 0; -} - -static void dce_v11_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - u32 lb_interrupt_mask; - - if (crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VBLANK_INTERRUPT_MASK, 0); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - case AMDGPU_IRQ_STATE_ENABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VBLANK_INTERRUPT_MASK, 1); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - default: - break; - } -} - -static void dce_v11_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - u32 lb_interrupt_mask; - - if (crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VLINE_INTERRUPT_MASK, 0); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - case AMDGPU_IRQ_STATE_ENABLE: - lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]); - lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK, - VLINE_INTERRUPT_MASK, 1); - WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask); - break; - default: - break; - } -} - -static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned hpd, - enum amdgpu_interrupt_state state) -{ - u32 tmp; - - if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hpd %d\n", hpd); - return 0; - } - - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); - break; - case AMDGPU_IRQ_STATE_ENABLE: - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); - break; - default: - break; - } - - return 0; -} - -static int dce_v11_0_set_crtc_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - switch (type) { - case AMDGPU_CRTC_IRQ_VBLANK1: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 0, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK2: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 1, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK3: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 2, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK4: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 3, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK5: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 4, state); - break; - case AMDGPU_CRTC_IRQ_VBLANK6: - dce_v11_0_set_crtc_vblank_interrupt_state(adev, 5, state); - break; - case AMDGPU_CRTC_IRQ_VLINE1: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 0, state); - break; - case AMDGPU_CRTC_IRQ_VLINE2: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 1, state); - break; - case AMDGPU_CRTC_IRQ_VLINE3: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 2, state); - break; - case AMDGPU_CRTC_IRQ_VLINE4: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 3, state); - break; - case AMDGPU_CRTC_IRQ_VLINE5: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 4, state); - break; - case AMDGPU_CRTC_IRQ_VLINE6: - dce_v11_0_set_crtc_vline_interrupt_state(adev, 5, state); - break; - default: - break; - } - return 0; -} - -static int dce_v11_0_set_pageflip_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned type, - enum amdgpu_interrupt_state state) -{ - u32 reg; - - if (type >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", type); - return -EINVAL; - } - - reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]); - if (state == AMDGPU_IRQ_STATE_DISABLE) - WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type], - reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); - else - WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type], - reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK); - - return 0; -} - -static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - unsigned long flags; - unsigned crtc_id; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_flip_work *works; - - crtc_id = (entry->src_id - 8) >> 1; - amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - - if (crtc_id >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", crtc_id); - return -EINVAL; - } - - if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) & - GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK) - WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id], - GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK); - - /* IRQ could occur when in initial stage */ - if(amdgpu_crtc == NULL) - return 0; - - spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ - DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " - "AMDGPU_FLIP_SUBMITTED(%d)\n", - amdgpu_crtc->pflip_status, - AMDGPU_FLIP_SUBMITTED); - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - return 0; - } - - /* page flip completed. clean up */ - amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; - amdgpu_crtc->pflip_works = NULL; - - /* wakeup usersapce */ - if(works->event) - drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); - - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - - drm_crtc_vblank_put(&amdgpu_crtc->base); - schedule_work(&works->unpin_work); - - return 0; -} - -static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, - int hpd) -{ - u32 tmp; - - if (hpd >= adev->mode_info.num_hpd) { - DRM_DEBUG("invalid hpd %d\n", hpd); - return; - } - - tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]); - tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1); - WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp); -} - -static void dce_v11_0_crtc_vblank_int_ack(struct amdgpu_device *adev, - int crtc) -{ - u32 tmp; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]); - tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1); - WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp); -} - -static void dce_v11_0_crtc_vline_int_ack(struct amdgpu_device *adev, - int crtc) -{ - u32 tmp; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]); - tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1); - WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp); -} - -static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - unsigned crtc = entry->src_id - 1; - uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, - crtc); - - switch (entry->src_data[0]) { - case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) - dce_v11_0_crtc_vblank_int_ack(adev, crtc); - else - DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); - - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev_to_drm(adev), crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); - - break; - case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) - dce_v11_0_crtc_vline_int_ack(adev, crtc); - else - DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); - - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - - break; - default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); - break; - } - - return 0; -} - -static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - uint32_t disp_int, mask; - unsigned hpd; - - if (entry->src_data[0] >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); - return 0; - } - - hpd = entry->src_data[0]; - disp_int = RREG32(interrupt_status_offsets[hpd].reg); - mask = interrupt_status_offsets[hpd].hpd; - - if (disp_int & mask) { - dce_v11_0_hpd_int_ack(adev, hpd); - schedule_delayed_work(&adev->hotplug_work, 0); - DRM_DEBUG("IH: HPD%d\n", hpd + 1); - } - - return 0; -} - -static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, - enum amd_clockgating_state state) -{ - return 0; -} - -static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, - enum amd_powergating_state state) -{ - return 0; -} - -static const struct amd_ip_funcs dce_v11_0_ip_funcs = { - .name = "dce_v11_0", - .early_init = dce_v11_0_early_init, - .sw_init = dce_v11_0_sw_init, - .sw_fini = dce_v11_0_sw_fini, - .hw_init = dce_v11_0_hw_init, - .hw_fini = dce_v11_0_hw_fini, - .suspend = dce_v11_0_suspend, - .resume = dce_v11_0_resume, - .is_idle = dce_v11_0_is_idle, - .soft_reset = dce_v11_0_soft_reset, - .set_clockgating_state = dce_v11_0_set_clockgating_state, - .set_powergating_state = dce_v11_0_set_powergating_state, -}; - -static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - - amdgpu_encoder->pixel_clock = adjusted_mode->clock; - - /* need to call this here rather than in prepare() since we need some crtc info */ - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); - - /* set scaler clears this on some chips */ - dce_v11_0_set_interleave(encoder->crtc, mode); - - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { - dce_v11_0_afmt_enable(encoder, true); - dce_v11_0_afmt_setmode(encoder, adjusted_mode); - } -} - -static void dce_v11_0_encoder_prepare(struct drm_encoder *encoder) -{ - struct amdgpu_device *adev = drm_to_adev(encoder->dev); - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder); - - if ((amdgpu_encoder->active_device & - (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) || - (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != - ENCODER_OBJECT_ID_NONE)) { - struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - if (dig) { - dig->dig_encoder = dce_v11_0_pick_dig_encoder(encoder); - if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT) - dig->afmt = adev->mode_info.afmt[dig->dig_encoder]; - } - } - - amdgpu_atombios_scratch_regs_lock(adev, true); - - if (connector) { - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); - - /* select the clock/data port if it uses a router */ - if (amdgpu_connector->router.cd_valid) - amdgpu_i2c_router_select_cd_port(amdgpu_connector); - - /* turn eDP panel on for mode set */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - amdgpu_atombios_encoder_set_edp_panel_power(connector, - ATOM_TRANSMITTER_ACTION_POWER_ON); - } - - /* this is needed for the pll/ss setup to work correctly in some cases */ - amdgpu_atombios_encoder_set_crtc_source(encoder); - /* set up the FMT blocks */ - dce_v11_0_program_fmt(encoder); -} - -static void dce_v11_0_encoder_commit(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - - /* need to call this here as we need the crtc set up */ - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON); - amdgpu_atombios_scratch_regs_lock(adev, false); -} - -static void dce_v11_0_encoder_disable(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - struct amdgpu_encoder_atom_dig *dig; - - amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); - - if (amdgpu_atombios_encoder_is_digital(encoder)) { - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) - dce_v11_0_afmt_enable(encoder, false); - dig = amdgpu_encoder->enc_priv; - dig->dig_encoder = -1; - } - amdgpu_encoder->active_device = 0; -} - -/* these are handled by the primary encoders */ -static void dce_v11_0_ext_prepare(struct drm_encoder *encoder) -{ - -} - -static void dce_v11_0_ext_commit(struct drm_encoder *encoder) -{ - -} - -static void -dce_v11_0_ext_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - -} - -static void dce_v11_0_ext_disable(struct drm_encoder *encoder) -{ - -} - -static void -dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode) -{ - -} - -static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = { - .dpms = dce_v11_0_ext_dpms, - .prepare = dce_v11_0_ext_prepare, - .mode_set = dce_v11_0_ext_mode_set, - .commit = dce_v11_0_ext_commit, - .disable = dce_v11_0_ext_disable, - /* no detect for TMDS/LVDS yet */ -}; - -static const struct drm_encoder_helper_funcs dce_v11_0_dig_helper_funcs = { - .dpms = amdgpu_atombios_encoder_dpms, - .mode_fixup = amdgpu_atombios_encoder_mode_fixup, - .prepare = dce_v11_0_encoder_prepare, - .mode_set = dce_v11_0_encoder_mode_set, - .commit = dce_v11_0_encoder_commit, - .disable = dce_v11_0_encoder_disable, - .detect = amdgpu_atombios_encoder_dig_detect, -}; - -static const struct drm_encoder_helper_funcs dce_v11_0_dac_helper_funcs = { - .dpms = amdgpu_atombios_encoder_dpms, - .mode_fixup = amdgpu_atombios_encoder_mode_fixup, - .prepare = dce_v11_0_encoder_prepare, - .mode_set = dce_v11_0_encoder_mode_set, - .commit = dce_v11_0_encoder_commit, - .detect = amdgpu_atombios_encoder_dac_detect, -}; - -static void dce_v11_0_encoder_destroy(struct drm_encoder *encoder) -{ - struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder); - kfree(amdgpu_encoder->enc_priv); - drm_encoder_cleanup(encoder); - kfree(amdgpu_encoder); -} - -static const struct drm_encoder_funcs dce_v11_0_encoder_funcs = { - .destroy = dce_v11_0_encoder_destroy, -}; - -static void dce_v11_0_encoder_add(struct amdgpu_device *adev, - uint32_t encoder_enum, - uint32_t supported_device, - u16 caps) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_encoder *encoder; - struct amdgpu_encoder *amdgpu_encoder; - - /* see if we already added it */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - amdgpu_encoder = to_amdgpu_encoder(encoder); - if (amdgpu_encoder->encoder_enum == encoder_enum) { - amdgpu_encoder->devices |= supported_device; - return; - } - - } - - /* add a new one */ - amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL); - if (!amdgpu_encoder) - return; - - encoder = &amdgpu_encoder->base; - switch (adev->mode_info.num_crtc) { - case 1: - encoder->possible_crtcs = 0x1; - break; - case 2: - default: - encoder->possible_crtcs = 0x3; - break; - case 3: - encoder->possible_crtcs = 0x7; - break; - case 4: - encoder->possible_crtcs = 0xf; - break; - case 5: - encoder->possible_crtcs = 0x1f; - break; - case 6: - encoder->possible_crtcs = 0x3f; - break; - } - - amdgpu_encoder->enc_priv = NULL; - - amdgpu_encoder->encoder_enum = encoder_enum; - amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - amdgpu_encoder->devices = supported_device; - amdgpu_encoder->rmx_type = RMX_OFF; - amdgpu_encoder->underscan_type = UNDERSCAN_OFF; - amdgpu_encoder->is_ext_encoder = false; - amdgpu_encoder->caps = caps; - - switch (amdgpu_encoder->encoder_id) { - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1: - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2: - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs); - break; - case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: - case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - amdgpu_encoder->rmx_type = RMX_FULL; - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder); - } else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) { - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder); - } else { - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); - amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder); - } - drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs); - break; - case ENCODER_OBJECT_ID_SI170B: - case ENCODER_OBJECT_ID_CH7303: - case ENCODER_OBJECT_ID_EXTERNAL_SDVOA: - case ENCODER_OBJECT_ID_EXTERNAL_SDVOB: - case ENCODER_OBJECT_ID_TITFP513: - case ENCODER_OBJECT_ID_VT1623: - case ENCODER_OBJECT_ID_HDMI_SI1930: - case ENCODER_OBJECT_ID_TRAVIS: - case ENCODER_OBJECT_ID_NUTMEG: - /* these are handled by the primary encoders */ - amdgpu_encoder->is_ext_encoder = true; - if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_LVDS, NULL); - else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); - else - drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs, - DRM_MODE_ENCODER_TMDS, NULL); - drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs); - break; - } -} - -static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { - .bandwidth_update = &dce_v11_0_bandwidth_update, - .vblank_get_counter = &dce_v11_0_vblank_get_counter, - .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, - .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, - .hpd_sense = &dce_v11_0_hpd_sense, - .hpd_set_polarity = &dce_v11_0_hpd_set_polarity, - .hpd_get_gpio_reg = &dce_v11_0_hpd_get_gpio_reg, - .page_flip = &dce_v11_0_page_flip, - .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos, - .add_encoder = &dce_v11_0_encoder_add, - .add_connector = &amdgpu_connector_add, -}; - -static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) -{ - adev->mode_info.funcs = &dce_v11_0_display_funcs; -} - -static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = { - .set = dce_v11_0_set_crtc_irq_state, - .process = dce_v11_0_crtc_irq, -}; - -static const struct amdgpu_irq_src_funcs dce_v11_0_pageflip_irq_funcs = { - .set = dce_v11_0_set_pageflip_irq_state, - .process = dce_v11_0_pageflip_irq, -}; - -static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = { - .set = dce_v11_0_set_hpd_irq_state, - .process = dce_v11_0_hpd_irq, -}; - -static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev) -{ - if (adev->mode_info.num_crtc > 0) - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; - else - adev->crtc_irq.num_types = 0; - adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs; - - adev->pageflip_irq.num_types = adev->mode_info.num_crtc; - adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs; - - adev->hpd_irq.num_types = adev->mode_info.num_hpd; - adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs; -} - -const struct amdgpu_ip_block_version dce_v11_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 11, - .minor = 0, - .rev = 0, - .funcs = &dce_v11_0_ip_funcs, -}; - -const struct amdgpu_ip_block_version dce_v11_2_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 11, - .minor = 2, - .rev = 0, - .funcs = &dce_v11_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 276c025c4c03..acc887a58518 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1034,7 +1034,6 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; @@ -1451,17 +1450,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev) static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index e62ccf9eb73d..2ccd6aad8dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1096,8 +1096,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, /* save values for DPM */ amdgpu_crtc->line_time = line_time; - amdgpu_crtc->wm_high = latency_watermark_a; - amdgpu_crtc->wm_low = latency_watermark_b; + /* Save number of lines the linebuffer leads before the scanout */ amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines; } @@ -1443,17 +1442,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev) static void dce_v8_0_audio_fini(struct amdgpu_device *adev) { - int i; - if (!amdgpu_audio) return; if (!adev->mode_info.audio.enabled) return; - for (i = 0; i < adev->mode_info.audio.num_pins; i++) - dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); - adev->mode_info.audio.enabled = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 75ea071744eb..8841d7213de4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4075,7 +4075,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned int index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; memset(&ib, 0, sizeof(ib)); @@ -4322,8 +4322,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; int ctx_reg_offset; @@ -4952,11 +4951,15 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } } - /* TODO: Add queue reset mask when FW fully supports it */ + adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); if (r) { @@ -7664,19 +7667,17 @@ static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v10_0_cp_compute_enable(adev, false); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - } + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); @@ -9046,21 +9047,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } -static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, mmSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -9522,7 +9508,9 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } -static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -9532,15 +9520,14 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) u64 addr; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); - if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) { + if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; } @@ -9560,12 +9547,9 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 0, 1, 0x20); gfx_v10_0_ring_emit_reg_wait(kiq_ring, SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff); - kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9575,11 +9559,25 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -9587,12 +9585,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -9603,9 +9600,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9641,13 +9637,12 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -9882,7 +9877,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kgq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, @@ -9923,7 +9917,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kcq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index afd6d59164bf..d61eb9f187c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -85,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -602,7 +603,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; /* MES KIQ fw hasn't indirect buffer support for now */ @@ -759,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, @@ -845,8 +850,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; int ctx_reg_offset; @@ -1607,9 +1611,9 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if (!adev->gfx.disable_uq && - adev->gfx.me_fw_version >= 2390 && - adev->gfx.pfp_fw_version >= 2530 && - adev->gfx.mec_fw_version >= 2600 && + adev->gfx.me_fw_version >= 2420 && + adev->gfx.pfp_fw_version >= 2580 && + adev->gfx.mec_fw_version >= 2650 && adev->mes.fw_version[0] >= 120) { adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; @@ -1649,6 +1653,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.pfp_fw_version >= 102 && + adev->gfx.mec_fw_version >= 66 && + adev->mes.fw_version[0] >= 128) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; @@ -1801,12 +1820,17 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if ((adev->gfx.me_fw_version >= 2280) && - (adev->gfx.mec_fw_version >= 2410)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2410) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } break; default: + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } break; } @@ -4119,6 +4143,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -4271,8 +4297,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, prop->allow_tunneling); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; @@ -4629,8 +4657,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->gfxhub.funcs->set_fault_enable_default(adev, value); /* TODO investigate why this and the hdp flush above is needed, @@ -5835,8 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); @@ -6278,21 +6303,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -6806,13 +6816,14 @@ static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) return 0; } -static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { @@ -6835,7 +6846,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -6968,13 +6979,14 @@ static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) return 0; } -static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r = 0; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { @@ -6995,7 +7007,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -7231,7 +7243,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kgq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, @@ -7273,7 +7284,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kcq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 1234c8d64e20..93fde0f9af87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -79,6 +79,7 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { @@ -496,7 +497,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *f = NULL; unsigned index; uint64_t gpu_addr; - volatile uint32_t *cpu_ptr; + uint32_t *cpu_ptr; long r; /* MES KIQ fw hasn't indirect buffer support for now */ @@ -586,7 +587,7 @@ out: static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -613,9 +614,14 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; @@ -679,8 +685,7 @@ static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0, clustercount = 0, i; const struct cs_section_def *sect = NULL; @@ -1542,10 +1547,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): if ((adev->gfx.me_fw_version >= 2660) && - (adev->gfx.mec_fw_version >= 2920)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2920) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } + break; + default: + break; } if (!adev->enable_mes_kiq) { @@ -3016,6 +3025,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, #endif if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); + if (!prop->kernel_queue) + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ @@ -3165,8 +3176,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + if (prop->kernel_queue) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + } if (prop->tmz_queue) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); mqd->cp_hqd_pq_control = tmp; @@ -3510,8 +3523,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->gfxhub.funcs->set_fault_enable_default(adev, value); /* TODO investigate why this and the hdp flush above is needed, @@ -4407,8 +4419,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); @@ -4690,21 +4700,6 @@ static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5307,13 +5302,14 @@ static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring) return 0; } -static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { @@ -5335,7 +5331,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -5421,13 +5417,14 @@ static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) return 0; } -static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) +static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { @@ -5448,7 +5445,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) @@ -5526,7 +5523,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kgq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, @@ -5565,7 +5561,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kcq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 70d7a1f434c4..7693b7953426 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -86,7 +86,7 @@ MODULE_FIRMWARE("amdgpu/hainan_ce.bin"); MODULE_FIRMWARE("amdgpu/hainan_rlc.bin"); static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev); -static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); +static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer); //static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v6_0_init_pg(struct amdgpu_device *adev); @@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; - volatile u32 *dst_ptr; + u32 *dst_ptr; u32 dws; u64 reg_list_mc_addr; const struct cs_section_def *cs_data; @@ -2855,8 +2855,7 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index da0534ff1271..5976ed55d9db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -883,7 +883,7 @@ static const u32 kalindi_rlc_save_restore_register_list[] = { }; static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); -static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); +static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); @@ -3882,8 +3882,7 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; @@ -4884,76 +4883,6 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } -static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, - int mem_space, int opt, uint32_t addr0, - uint32_t addr1, uint32_t ref, uint32_t mask, - uint32_t inv) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, - /* memory (1) or register (0) */ - (WAIT_REG_MEM_MEM_SPACE(mem_space) | - WAIT_REG_MEM_OPERATION(opt) | /* wait */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); - - if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ - amdgpu_ring_write(ring, addr0); - amdgpu_ring_write(ring, addr1); - amdgpu_ring_write(ring, ref); - amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, inv); /* poll interval */ -} - -static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) -{ - gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); -} - -static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) - return -ENOMEM; - gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); - gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); - - return amdgpu_ring_test_ring(ring); -} - static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -5003,7 +4932,6 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, - .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5ee2237d8ee8..0856ff65288c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1220,8 +1220,7 @@ out: return err; } -static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; @@ -4640,6 +4639,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } return 0; @@ -6339,34 +6339,6 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } -static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, - int mem_space, int opt, uint32_t addr0, - uint32_t addr1, uint32_t ref, uint32_t mask, - uint32_t inv) -{ - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, - /* memory (1) or register (0) */ - (WAIT_REG_MEM_MEM_SPACE(mem_space) | - WAIT_REG_MEM_OPERATION(opt) | /* wait */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(eng_sel))); - - if (mem_space) - BUG_ON(addr0 & 0x3); /* Dword align */ - amdgpu_ring_write(ring, addr0); - amdgpu_ring_write(ring, addr1); - amdgpu_ring_write(ring, ref); - amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, inv); /* poll interval */ -} - -static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) -{ - gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); -} - static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6843,48 +6815,6 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } -static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) - return -ENOMEM; - gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); - gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); - - return amdgpu_ring_test_ring(ring); -} - static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6950,7 +6880,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, - .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d377a7c57d5e..dd19a97436db 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1648,8 +1648,7 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) return count; } -static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, - volatile u32 *buffer) +static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) { u32 count = 0; @@ -2235,6 +2234,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); @@ -2391,6 +2409,8 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { @@ -2629,6 +2649,9 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) !READ_ONCE(adev->barrier_has_auto_waitcnt)); WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); break; + case IP_VERSION(9, 4, 2): + gfx_v9_4_2_init_sq(adev); + break; default: break; } @@ -4151,19 +4174,17 @@ static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); - } + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); @@ -7152,53 +7173,9 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } -static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v9_0_ring_emit_wreg(kiq_ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) - return -ENOMEM; - gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v9_0_ring_emit_reg_wait(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); - gfx_v9_0_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); - - return amdgpu_ring_test_ring(ring); -} - static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -7206,12 +7183,11 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -7261,13 +7237,13 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { DRM_ERROR("fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -7477,7 +7453,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, - .reset = gfx_v9_0_reset_kgq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index c48cd47b531f..8058ea91ecaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -748,6 +748,18 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, } } +void gfx_v9_4_2_init_sq(struct amdgpu_device *adev) +{ + uint32_t data; + + if (adev->gfx.mec_fw_version >= 98) { + adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; + data = RREG32_SOC15(GC, 0, regSQ_CONFIG1); + data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1); + WREG32_SOC15(GC, 0, regSQ_CONFIG1, data); + } +} + void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, uint32_t first_vmid, uint32_t last_vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 7584624b641c..a603724c1dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -28,6 +28,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, uint32_t first_vmid, uint32_t last_vmid); void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, uint32_t die_id); +void gfx_v9_4_2_init_sq(struct amdgpu_device *adev); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c233edf60569..77f9d5b9a556 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1148,13 +1148,15 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): - if (adev->gfx.mec_fw_version >= 155) { + if ((adev->gfx.mec_fw_version >= 155) && + !amdgpu_sriov_vf(adev)) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; case IP_VERSION(9, 5, 0): - if (adev->gfx.mec_fw_version >= 21) { + if ((adev->gfx.mec_fw_version >= 21) && + !amdgpu_sriov_vf(adev)) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } @@ -1349,7 +1351,9 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { /* ToDo: GC 9.4.4 */ case IP_VERSION(9, 4, 3): - if (adev->gfx.mec_fw_version >= 184) + if (adev->gfx.mec_fw_version >= 184 && + (amdgpu_sriov_reg_access_sq_config(adev) || + !amdgpu_sriov_vf(adev))) adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; break; case IP_VERSION(9, 5, 0): @@ -2457,19 +2461,17 @@ static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block) /* Disable MEC parsing/prefetching */ gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); - } + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); @@ -3552,20 +3554,21 @@ static int gfx_v9_4_3_reset_hw_pipe(struct amdgpu_ring *ring) } static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + int reset_mode = AMDGPU_RESET_TYPE_PER_QUEUE; unsigned long flags; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -3591,8 +3594,11 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n"); pipe_reset: - if(r) { + if (r) { + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) + return -EOPNOTSUPP; r = gfx_v9_4_3_reset_hw_pipe(ring); + reset_mode = AMDGPU_RESET_TYPE_PER_PIPE; dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, r ? "failed" : "successfully"); if (r) @@ -3612,14 +3618,24 @@ pipe_reset: } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) + goto pipe_reset; + dev_err(adev->dev, "fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + + if (reset_mode == AMDGPU_RESET_TYPE_PER_QUEUE) { + r = amdgpu_ring_test_ring(ring); + if (r) + goto pipe_reset; + } + + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } enum amdgpu_gfx_cp_ras_mem_id { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index cb25f7f0dfc1..6c03bf9f1ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -74,6 +74,8 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, uint32_t xcc_mask) { + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? + adev->gmc.vram_start : adev->gmc.fb_start; uint64_t pt_base; int i; @@ -91,10 +93,10 @@ static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, if (adev->gmc.pdb0_bo) { WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); + (u32)(gart_start >> 12)); WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); + (u32)(gart_start >> 44)); WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, @@ -180,7 +182,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, /* In the case squeezing vram into GART aperture, we don't use * FB aperture and AGP aperture. Disable them. */ - if (adev->gmc.pdb0_bo) { + if (adev->gmc.pdb0_bo && adev->gmc.xgmi.connected_to_cpu) { WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0); WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 7923f491cf73..d7499be8c4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -466,24 +466,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int * 0 valid */ -static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - } -} - static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -508,21 +490,39 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_NOALLOC; - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_LOG; @@ -563,7 +563,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .map_mtype = gmc_v10_0_map_mtype, .get_vm_pde = gmc_v10_0_get_vm_pde, .get_vm_pte = gmc_v10_0_get_vm_pte, .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size, @@ -964,8 +963,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; if (!adev->in_s0ix) adev->gfxhub.funcs->set_fault_enable_default(adev, value); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index f15d691e9a20..7bc389d9f5c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -430,24 +430,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int * 0 valid */ -static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC); - } -} - static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -472,21 +454,39 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_NOALLOC; - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_LOG; @@ -527,7 +527,6 @@ static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping, - .map_mtype = gmc_v11_0_map_mtype, .get_vm_pde = gmc_v11_0_get_vm_pde, .get_vm_pte = gmc_v11_0_get_vm_pte, .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size, @@ -906,8 +905,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index de763105fdfd..f4a19357ccbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -336,6 +336,22 @@ static void gmc_v12_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried; int vmid, i; + if (adev->enable_uni_mes && adev->mes.ring[AMDGPU_MES_SCHED_PIPE].sched.ready && + (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x84) { + struct mes_inv_tlbs_pasid_input input = {0}; + input.pasid = pasid; + input.flush_type = flush_type; + input.hub_id = AMDGPU_GFXHUB(0); + /* MES will invalidate all gc_hub for the device from master */ + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + if (all_hub) { + /* Only need to invalidate mm_hub now, gfx12 only support one mmhub */ + input.hub_id = AMDGPU_MMHUB0(0); + adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); + } + return; + } + for (vmid = 1; vmid < 16; vmid++) { bool valid; @@ -453,20 +469,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 0 valid */ -static uint64_t gmc_v12_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC); - } -} - static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -490,18 +492,35 @@ static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT_GFX12) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT_GFX12; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_SYSTEM; @@ -543,7 +562,6 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping, - .map_mtype = gmc_v12_0_map_mtype, .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, @@ -876,8 +894,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) /* Flush HDP after it is initialized */ amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v12_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 8030fcd64210..f6ad7911f1e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -382,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { *flags &= ~AMDGPU_PTE_EXECUTABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index a8d5795084fc..0e5e54d0a9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -504,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { *flags &= ~AMDGPU_PTE_EXECUTABLE; @@ -1066,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1288,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1304,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index b45fa0cea9d2..e1509480dfc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -716,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags &= ~AMDGPU_PTE_PRT; } @@ -1179,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1474,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1490,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 78f65aea03f8..0d1dd587db5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -78,8 +78,6 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 -#define MAX_MEM_RANGES 8 - static const char * const gfxhub_client_ids[] = { "CB", "DB", @@ -411,11 +409,6 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; -static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev) -{ - return !!adev->aid_mask; -} - static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -649,7 +642,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -798,7 +791,7 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - gmc_v9_0_is_multi_chiplet(adev)) + amdgpu_is_multi_aid(adev)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1080,27 +1073,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v * 0 valid */ -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) - -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC); - case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC); - } -} - static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -1128,8 +1100,9 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, - struct amdgpu_bo_va_mapping *mapping, + uint32_t vm_flags, uint64_t *flags) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -1139,7 +1112,6 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_EXT_COHERENT); bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; @@ -1169,7 +1141,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, mtype = MTYPE_UC; else mtype = MTYPE_NC; - if (mapping->bo_va->is_xgmi) + if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { @@ -1244,24 +1216,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_RW: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); + break; + case AMDGPU_VM_MTYPE_CC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); + break; + } - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags &= ~AMDGPU_PTE_VALID; } if ((*flags & AMDGPU_PTE_VALID) && bo) - gmc_v9_0_get_coherence_flags(adev, bo, mapping, flags); + gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags); } static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, @@ -1382,46 +1373,6 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } -static enum amdgpu_memory_partition -gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) -{ - enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; - - if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev, - supp_modes); - - return mode; -} - -static enum amdgpu_memory_partition -gmc_v9_0_query_vf_memory_partition(struct amdgpu_device *adev) -{ - switch (adev->gmc.num_mem_partitions) { - case 0: - return UNKNOWN_MEMORY_PARTITION_MODE; - case 1: - return AMDGPU_NPS1_PARTITION_MODE; - case 2: - return AMDGPU_NPS2_PARTITION_MODE; - case 4: - return AMDGPU_NPS4_PARTITION_MODE; - default: - return AMDGPU_NPS1_PARTITION_MODE; - } - - return AMDGPU_NPS1_PARTITION_MODE; -} - -static enum amdgpu_memory_partition -gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) -{ - if (amdgpu_sriov_vf(adev)) - return gmc_v9_0_query_vf_memory_partition(adev); - - return gmc_v9_0_get_memory_partition(adev, NULL); -} - static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev) { if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && @@ -1438,12 +1389,11 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, - .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, + .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, .need_reset_on_init = &gmc_v9_0_need_reset_on_init, }; @@ -1550,7 +1500,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1596,7 +1546,7 @@ static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) return; - mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); /* Mode detected by hardware and supported modes available */ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) { @@ -1632,7 +1582,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || - gmc_v9_0_is_multi_chiplet(adev)) + amdgpu_is_multi_aid(adev)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1719,7 +1669,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { amdgpu_gmc_sysvm_location(adev, mc); } else { amdgpu_gmc_vram_location(adev, mc, base); @@ -1834,7 +1784,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return 0; } - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { adev->gmc.vmid0_page_table_depth = 1; adev->gmc.vmid0_page_table_block_size = 12; } else { @@ -1860,7 +1810,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) r = amdgpu_gmc_pdb0_alloc(adev); } @@ -1882,195 +1832,21 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } -static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) -{ - enum amdgpu_memory_partition mode; - u32 supp_modes; - bool valid; - - mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); - - /* Mode detected by hardware not present in supported modes */ - if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && - !(BIT(mode - 1) & supp_modes)) - return false; - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - case AMDGPU_NPS1_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 1); - break; - case AMDGPU_NPS2_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 2); - break; - case AMDGPU_NPS4_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 3 || - adev->gmc.num_mem_partitions == 4); - break; - default: - valid = false; - } - - return valid; -} - -static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid) -{ - int i; - - /* Check if node with id 'nid' is present in 'node_ids' array */ - for (i = 0; i < num_ids; ++i) - if (node_ids[i] == nid) - return true; - - return false; -} - -static void -gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - struct amdgpu_numa_info numa_info; - int node_ids[MAX_MEM_RANGES]; - int num_ranges = 0, ret; - int num_xcc, xcc_id; - uint32_t xcc_mask; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - xcc_mask = (1U << num_xcc) - 1; - - for_each_inst(xcc_id, xcc_mask) { - ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); - if (ret) - continue; - - if (numa_info.nid == NUMA_NO_NODE) { - mem_ranges[0].size = numa_info.size; - mem_ranges[0].numa.node = numa_info.nid; - num_ranges = 1; - break; - } - - if (gmc_v9_0_is_node_present(node_ids, num_ranges, - numa_info.nid)) - continue; - - node_ids[num_ranges] = numa_info.nid; - mem_ranges[num_ranges].numa.node = numa_info.nid; - mem_ranges[num_ranges].size = numa_info.size; - ++num_ranges; - } - - adev->gmc.num_mem_partitions = num_ranges; -} - -static void -gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - enum amdgpu_memory_partition mode; - u32 start_addr = 0, size; - int i, r, l; - - mode = gmc_v9_0_query_memory_partition(adev); - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - adev->gmc.num_mem_partitions = 0; - break; - case AMDGPU_NPS1_PARTITION_MODE: - adev->gmc.num_mem_partitions = 1; - break; - case AMDGPU_NPS2_PARTITION_MODE: - adev->gmc.num_mem_partitions = 2; - break; - case AMDGPU_NPS4_PARTITION_MODE: - if (adev->flags & AMD_IS_APU) - adev->gmc.num_mem_partitions = 3; - else - adev->gmc.num_mem_partitions = 4; - break; - default: - adev->gmc.num_mem_partitions = 1; - break; - } - - /* Use NPS range info, if populated */ - r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges, - &adev->gmc.num_mem_partitions); - if (!r) { - l = 0; - for (i = 1; i < adev->gmc.num_mem_partitions; ++i) { - if (mem_ranges[i].range.lpfn > - mem_ranges[i - 1].range.lpfn) - l = i; - } - - } else { - if (!adev->gmc.num_mem_partitions) { - dev_err(adev->dev, - "Not able to detect NPS mode, fall back to NPS1"); - adev->gmc.num_mem_partitions = 1; - } - /* Fallback to sw based calculation */ - size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT; - size /= adev->gmc.num_mem_partitions; - - for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { - mem_ranges[i].range.fpfn = start_addr; - mem_ranges[i].size = - ((u64)size << AMDGPU_GPU_PAGE_SHIFT); - mem_ranges[i].range.lpfn = start_addr + size - 1; - start_addr += size; - } - - l = adev->gmc.num_mem_partitions - 1; - } - - /* Adjust the last one */ - mem_ranges[l].range.lpfn = - (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; - mem_ranges[l].size = - adev->gmc.real_vram_size - - ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT); -} - -static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) -{ - bool valid; - - adev->gmc.mem_partitions = kcalloc(MAX_MEM_RANGES, - sizeof(struct amdgpu_mem_partition_info), - GFP_KERNEL); - if (!adev->gmc.mem_partitions) - return -ENOMEM; - - /* TODO : Get the range from PSP/Discovery for dGPU */ - if (adev->gmc.is_app_apu) - gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); - else - gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); - - if (amdgpu_sriov_vf(adev)) - valid = true; - else - valid = gmc_v9_0_validate_partition_info(adev); - if (!valid) { - /* TODO: handle invalid case */ - dev_WARN(adev->dev, - "Mem ranges not matching with hardware config"); - } - - return 0; -} - static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { + static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; + u32 vram_info; + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) { + vram_info = RREG32(regBIF_BIOS_SCRATCH_4); + adev->gmc.vram_vendor = vram_info & 0xF; + } } static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) @@ -2085,7 +1861,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); - if (gmc_v9_0_is_multi_chiplet(adev)) { + if (amdgpu_is_multi_aid(adev)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2235,8 +2011,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); - if (gmc_v9_0_is_multi_chiplet(adev)) { - r = gmc_v9_0_init_mem_ranges(adev); + if (amdgpu_is_multi_aid(adev)) { + r = amdgpu_gmc_init_mem_ranges(adev); if (r) return r; } @@ -2264,7 +2040,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vm_manager.first_kfd_vmid = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - gmc_v9_0_is_multi_chiplet(adev)) ? + amdgpu_is_multi_aid(adev)) ? 3 : 8; @@ -2276,7 +2052,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2286,7 +2062,7 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (gmc_v9_0_is_multi_chiplet(adev)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); @@ -2360,7 +2136,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) amdgpu_gmc_init_pdb0(adev); if (adev->gart.bo == NULL) { @@ -2518,7 +2294,7 @@ static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) * information again. */ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) { - gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS; } diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 5900b560b7de..333e9c30c091 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -587,8 +587,7 @@ static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 068ed849dbad..95b3f4e55ec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -562,8 +562,7 @@ static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index 40a3530e0453..b32ea4129c61 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -552,8 +552,7 @@ static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + use_bus_addr = adev->firmware.load_type != AMDGPU_FW_LOAD_PSP; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cfa91d709d49..cc626036ed9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index df898dbb746e..58cd87db8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -34,12 +34,13 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin"); #define TRANSFER_RAM_MASK 0x001c0000 static int imu_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; struct amdgpu_firmware_info *info = NULL; @@ -47,8 +48,12 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; @@ -362,7 +367,7 @@ static void program_imu_rlc_ram(struct amdgpu_device *adev, static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev) { u32 reg_data, size = 0; - const u32 *data; + const u32 *data = NULL; int r = -EINVAL; WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 574880d67009..4258d3e0b706 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -29,6 +29,14 @@ #include "amdgpu.h" #include "isp_v4_1_1.h" +MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin"); + +#define ISP_PERFORMANCE_STATE_LOW 0 +#define ISP_PERFORMANCE_STATE_HIGH 1 + +#define ISP_HIGH_PERFORMANC_XCLK 788 +#define ISP_HIGH_PERFORMANC_ICLK 788 + static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { ISP_4_1__SRCID__ISP_RINGBUFFER_WPT9, ISP_4_1__SRCID__ISP_RINGBUFFER_WPT10, @@ -56,17 +64,137 @@ static struct gpiod_lookup_table isp_sensor_gpio_table = { }, }; +static int isp_poweroff(struct generic_pm_domain *genpd) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + + return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, true, 0); +} + +static int isp_poweron(struct generic_pm_domain *genpd) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + + return amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ISP, false, 0); +} + +static int isp_set_performance_state(struct generic_pm_domain *genpd, + unsigned int state) +{ + struct amdgpu_isp *isp = container_of(genpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + u32 iclk, xclk; + int ret; + + switch (state) { + case ISP_PERFORMANCE_STATE_HIGH: + xclk = ISP_HIGH_PERFORMANC_XCLK; + iclk = ISP_HIGH_PERFORMANC_ICLK; + break; + case ISP_PERFORMANCE_STATE_LOW: + /* isp runs at default lowest clock-rate on power-on, do nothing */ + return 0; + default: + return -EINVAL; + } + + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPXCLK, xclk, 0); + if (ret) { + drm_err(&adev->ddev, "failed to set xclk %u to %u: %d\n", + xclk, state, ret); + return ret; + } + + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_ISPICLK, iclk, 0); + if (ret) { + drm_err(&adev->ddev, "failed to set iclk %u to %u: %d\n", + iclk, state, ret); + return ret; + } + + return 0; +} + +static int isp_genpd_add_device(struct device *dev, void *data) +{ + struct generic_pm_domain *gpd = data; + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + int ret; + + if (!pdev) + return -EINVAL; + + if (!dev->type->name) { + drm_dbg(&adev->ddev, "Invalid device type to add\n"); + goto exit; + } + + if (strcmp(dev->type->name, "mfd_device")) { + drm_dbg(&adev->ddev, "Invalid isp mfd device %s to add\n", pdev->mfd_cell->name); + goto exit; + } + + ret = pm_genpd_add_device(gpd, dev); + if (ret) { + drm_err(&adev->ddev, "Failed to add dev %s to genpd %d\n", + pdev->mfd_cell->name, ret); + return -ENODEV; + } + +exit: + /* Continue to add */ + return 0; +} + +static int isp_genpd_remove_device(struct device *dev, void *data) +{ + struct generic_pm_domain *gpd = data; + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + struct amdgpu_isp *isp = container_of(gpd, struct amdgpu_isp, ispgpd); + struct amdgpu_device *adev = isp->adev; + int ret; + + if (!pdev) + return -EINVAL; + + if (!dev->type->name) { + drm_dbg(&adev->ddev, "Invalid device type to remove\n"); + goto exit; + } + + if (strcmp(dev->type->name, "mfd_device")) { + drm_dbg(&adev->ddev, "Invalid isp mfd device %s to remove\n", + pdev->mfd_cell->name); + goto exit; + } + + ret = pm_genpd_remove_device(dev); + if (ret) { + drm_err(&adev->ddev, "Failed to remove dev from genpd %d\n", ret); + return -ENODEV; + } + +exit: + /* Continue to remove */ + return 0; +} + static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { + const struct software_node *amd_camera_node, *isp4_node; struct amdgpu_device *adev = isp->adev; + struct acpi_device *acpi_dev; int idx, int_idx, num_res, r; - u8 isp_dev_hid[ACPI_ID_LEN]; u64 isp_base; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; - r = amdgpu_acpi_get_isp4_dev_hid(&isp_dev_hid); + r = amdgpu_acpi_get_isp4_dev(&acpi_dev); if (r) { drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r); /* allow GPU init to progress */ @@ -74,18 +202,28 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) } /* add GPIO resources required for OMNI5C10 sensor */ - if (!strcmp("OMNI5C10", isp_dev_hid)) { + if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) { gpiod_add_lookup_table(&isp_gpio_table); gpiod_add_lookup_table(&isp_sensor_gpio_table); } isp_base = adev->rmmio_base; + isp->ispgpd.name = "ISP_v_4_1_1"; + isp->ispgpd.power_off = isp_poweroff; + isp->ispgpd.power_on = isp_poweron; + isp->ispgpd.set_performance_state = isp_set_performance_state; + + r = pm_genpd_init(&isp->ispgpd, NULL, true); + if (r) { + drm_err(&adev->ddev, "failed to initialize genpd (%d)\n", r); + return -EINVAL; + } + isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd cell alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd cell alloc failed (%d)\n", r); goto failure; } @@ -95,19 +233,20 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd resource alloc failed (%d)\n", r); goto failure; } isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); if (!isp->isp_pdata) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp platform data alloc failed\n", __func__); + drm_err(&adev->ddev, "isp platform data alloc failed (%d)\n", r); goto failure; } + amd_camera_node = (const struct software_node *)acpi_dev->driver_data; + isp4_node = software_node_find_by_name(amd_camera_node, "isp4"); + /* initialize isp platform data */ isp->isp_pdata->adev = (void *)adev; isp->isp_pdata->asic_type = adev->asic_type; @@ -136,14 +275,14 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; + isp->isp_cell[0].swnode = isp4_node; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); /* initialize isp i2c platform data */ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_i2c_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp mfd res alloc failed (%d)\n", r); goto failure; } @@ -162,8 +301,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_gpio_res) { r = -ENOMEM; - drm_err(&adev->ddev, - "%s: isp gpio res alloc failed\n", __func__); + drm_err(&adev->ddev, "isp gpio resource alloc failed (%d)\n", r); goto failure; } @@ -179,10 +317,23 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[2].platform_data = isp->isp_pdata; isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3); + /* add only amd_isp_capture and amd_isp_i2c_designware to genpd */ + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { - drm_err(&adev->ddev, - "%s: add mfd hotplug device failed\n", __func__); + drm_err(&adev->ddev, "add mfd hotplug device failed (%d)\n", r); + goto failure; + } + + r = device_for_each_child(isp->parent, &isp->ispgpd, + isp_genpd_add_device); + if (r) { + drm_err(&adev->ddev, "failed to add devices to genpd (%d)\n", r); + goto failure; + } + + r = mfd_add_hotplug_devices(isp->parent, &isp->isp_cell[2], 1); + if (r) { + drm_err(&adev->ddev, "add pinctl hotplug device failed (%d)\n", r); goto failure; } @@ -201,6 +352,9 @@ failure: static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) { + device_for_each_child(isp->parent, NULL, + isp_genpd_remove_device); + mfd_remove_devices(isp->parent); kfree(isp->isp_res); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 9e428e669ada..b5bb7f4d607c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -557,7 +557,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .extra_dw = 64, + .extra_bytes = 256, .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 4cde8a8bcc83..27c76bd424cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" -#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -118,7 +117,10 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -764,11 +766,20 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - jpeg_v2_0_stop(ring->adev); - jpeg_v2_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v2_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v2_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { @@ -794,7 +805,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -842,58 +853,3 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; - -/** - * jpeg_v2_dec_ring_parse_cs - command submission parser - * - * @parser: Command submission parser context - * @job: the job to parse - * @ib: the IB to parse - * - * Parse the command stream, return -EINVAL for invalid packet, - * 0 otherwise - */ -int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - u32 i, reg, res, cond, type; - struct amdgpu_device *adev = parser->adev; - - for (i = 0; i < ib->length_dw ; i += 2) { - reg = CP_PACKETJ_GET_REG(ib->ptr[i]); - res = CP_PACKETJ_GET_RES(ib->ptr[i]); - cond = CP_PACKETJ_GET_COND(ib->ptr[i]); - type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); - - if (res) /* only support 0 at the moment */ - return -EINVAL; - - switch (type) { - case PACKETJ_TYPE0: - if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || - reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE3: - if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || - reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE6: - if (ib->ptr[i] == CP_PACKETJ_NOP) - continue; - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - default: - dev_err(adev->dev, "Unknown packet type %d !\n", type); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 63fadda7a673..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,9 +45,6 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -#define JPEG_REG_RANGE_START 0x4000 -#define JPEG_REG_RANGE_END 0x41c2 - void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -60,9 +57,6 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); -int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 8b39e114f3be..20983f126b49 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -167,7 +167,10 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -643,11 +646,14 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { @@ -690,7 +696,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -721,7 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 2f8510c2986b..d1a011c40ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -132,7 +132,10 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -555,11 +558,20 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - jpeg_v3_0_stop(ring->adev); - jpeg_v3_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v3_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v3_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { @@ -585,7 +597,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index f17ec5414fd6..33db2c1ae6cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -143,7 +143,10 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -720,14 +723,20 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - if (amdgpu_sriov_vf(ring->adev)) - return -EINVAL; + int r; - jpeg_v4_0_stop(ring->adev); - jpeg_v4_0_start(ring->adev); - return amdgpu_ring_test_helper(ring); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v4_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { @@ -753,7 +762,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 79e342d5ab28..aae7328973d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -216,12 +216,11 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return 0; } @@ -242,8 +241,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); @@ -446,7 +444,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return ret; @@ -1143,14 +1141,17 @@ static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring) WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); } -static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v4_0_3_core_stall_reset(ring); jpeg_v4_0_3_start_jrbc(ring); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { @@ -1176,7 +1177,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 974030a5c03c..54fd9c800c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -174,9 +174,10 @@ static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); if (r) return r; @@ -685,7 +686,7 @@ static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -767,6 +768,22 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v4_0_5_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_5_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .name = "jpeg_v4_0_5", .early_init = jpeg_v4_0_5_early_init, @@ -790,7 +807,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -812,6 +829,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_5_ring_reset, }; static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 31d213ccbe0a..46bf15dce2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -120,13 +120,13 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - return 0; + + return r; } /** @@ -584,7 +584,7 @@ static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; if (enable) { if (!jpeg_v5_0_0_is_idle(ip_block)) @@ -644,6 +644,22 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v5_0_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v5_0_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .name = "jpeg_v5_0_0", .early_init = jpeg_v5_0_0_early_init, @@ -667,7 +683,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, - .parse_cs = jpeg_v2_dec_ring_parse_cs, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -689,6 +705,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_0_0_ring_reset, }; static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 3b6f65a25646..baf097d2e1ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -196,18 +196,25 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) } } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + r = amdgpu_jpeg_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); + return r; + } + } + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1)); if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - return 0; + return r; } /** @@ -226,8 +233,7 @@ static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); @@ -309,7 +315,7 @@ static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return ret; @@ -691,7 +697,7 @@ static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; @@ -834,14 +840,14 @@ static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring) WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); } -static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { - if (amdgpu_sriov_vf(ring->adev)) - return -EOPNOTSUPP; - + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_test_helper(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { @@ -1018,8 +1024,9 @@ static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_ban /* reference to smu driver if header file */ static int jpeg_v5_0_1_err_codes[] = { - 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ - 24, 25, 26, 27, 28, 29, 30, 31 + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-9][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31, + 48, 49, 50, 51, }; static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, @@ -1060,6 +1067,11 @@ static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_comm if (r) return r; + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v5_0_1_aca_info, NULL); + if (r) + goto late_fini; + if (amdgpu_ras_is_supported(adev, ras_block->block) && adev->jpeg.inst->ras_poison_irq.funcs) { r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0); @@ -1067,11 +1079,6 @@ static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_comm goto late_fini; } - r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, - &jpeg_v5_0_1_aca_info, NULL); - if (r) - goto late_fini; - return 0; late_fini: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index d6f50b13e2ba..1cd9eaeef38f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -21,6 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "mes_userqueue.h" @@ -198,6 +199,53 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } +static int mes_userq_detect_and_reset(struct amdgpu_device *adev, + int queue_type) +{ + int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev); + struct mes_detect_and_reset_queue_input input; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_mgr *uqm, *tmp; + unsigned int hung_db_num = 0; + int queue_id, r, i; + u32 db_array[8]; + + if (db_array_size > 8) { + dev_err(adev->dev, "DB array size (%d vs 8) too small\n", + db_array_size); + return -EINVAL; + } + + memset(&input, 0x0, sizeof(struct mes_detect_and_reset_queue_input)); + + input.queue_type = queue_type; + + amdgpu_mes_lock(&adev->mes); + r = amdgpu_mes_detect_and_reset_hung_queues(adev, queue_type, false, + &hung_db_num, db_array); + amdgpu_mes_unlock(&adev->mes); + if (r) { + dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r); + } else if (hung_db_num) { + list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { + idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { + if (queue->queue_type == queue_type) { + for (i = 0; i < hung_db_num; i++) { + if (queue->doorbell_index == db_array[i]) { + queue->state = AMDGPU_USERQ_STATE_HUNG; + atomic_inc(&adev->gpu_reset_counter); + amdgpu_userq_fence_driver_force_completion(queue); + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL); + } + } + } + } + } + } + + return r; +} + static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -206,6 +254,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; struct drm_amdgpu_userq_in *mqd_user = args_in; struct amdgpu_mqd_prop *userq_props; + struct amdgpu_gfx_shadow_info shadow_info; int r; /* Structure to initialize MQD for userqueue using generic MQD init function */ @@ -215,13 +264,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, return -ENOMEM; } - if (!mqd_user->wptr_va || !mqd_user->rptr_va || - !mqd_user->queue_va || mqd_user->queue_size == 0) { - DRM_ERROR("Invalid MQD parameters for userqueue\n"); - r = -EINVAL; - goto free_props; - } - r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size); if (r) { DRM_ERROR("Failed to create MQD object for userqueue\n"); @@ -238,6 +280,8 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->doorbell_index = queue->doorbell_index; userq_props->fence_address = queue->fence_drv->gpu_addr; + if (adev->gfx.funcs->get_gfx_shadow_info) + adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true); if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -254,6 +298,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va, + max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE))) + goto free_mqd; + userq_props->eop_gpu_addr = compute_mqd->eop_va; userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; @@ -281,6 +329,11 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->csa_addr = mqd_gfx_v11->csa_va; userq_props->tmz_queue = mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE; + + if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va, + shadow_info.shadow_size)) + goto free_mqd; + kfree(mqd_gfx_v11); } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; @@ -298,6 +351,10 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } + if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va, + shadow_info.csa_size)) + goto free_mqd; + userq_props->csa_addr = mqd_sdma_v11->csa_va; kfree(mqd_sdma_v11); } @@ -347,9 +404,82 @@ mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); } +static int mes_userq_preempt(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_suspend_gang_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + signed long timeout = 2100000; /* 2100 ms */ + u64 fence_gpu_addr; + u32 fence_offset; + u64 *fence_ptr; + int i, r; + + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + return 0; + r = amdgpu_device_wb_get(adev, &fence_offset); + if (r) + return r; + + fence_gpu_addr = adev->wb.gpu_addr + (fence_offset * 4); + fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; + *fence_ptr = 0; + + memset(&queue_input, 0x0, sizeof(struct mes_suspend_gang_input)); + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + queue_input.suspend_fence_addr = fence_gpu_addr; + queue_input.suspend_fence_value = 1; + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->suspend_gang(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) { + DRM_ERROR("Failed to suspend gang: %d\n", r); + goto out; + } + + for (i = 0; i < timeout; i++) { + if (*fence_ptr == 1) + goto out; + udelay(1); + } + r = -ETIMEDOUT; + +out: + amdgpu_device_wb_free(adev, fence_offset); + return r; +} + +static int mes_userq_restore(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + struct amdgpu_device *adev = uq_mgr->adev; + struct mes_resume_gang_input queue_input; + struct amdgpu_userq_obj *ctx = &queue->fw_obj; + int r; + + if (queue->state == AMDGPU_USERQ_STATE_HUNG) + return -EINVAL; + if (queue->state != AMDGPU_USERQ_STATE_PREEMPTED) + return 0; + + memset(&queue_input, 0x0, sizeof(struct mes_resume_gang_input)); + queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ; + + amdgpu_mes_lock(&adev->mes); + r = adev->mes.funcs->resume_gang(&adev->mes, &queue_input); + amdgpu_mes_unlock(&adev->mes); + if (r) + dev_err(adev->dev, "Failed to resume queue, err (%d)\n", r); + return r; +} + const struct amdgpu_userq_funcs userq_mes_funcs = { .mqd_create = mes_userq_mqd_create, .mqd_destroy = mes_userq_mqd_destroy, .unmap = mes_userq_unmap, .map = mes_userq_map, + .detect_and_reset = mes_userq_detect_and_reset, + .preempt = mes_userq_preempt, + .restore = mes_userq_restore, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c9eba537de09..da575bb1377f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -66,6 +66,9 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define GFX_MES_DRAM_SIZE 0x80000 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) +#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */ +#define MES11_HUNG_HQD_INFO_OFFSET 4 + static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -641,8 +644,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, break; case MES_MISC_OP_CHANGE_CONFIG: if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { - dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); - return -EINVAL; + dev_warn_once(mes->adev->dev, + "MES FW version must be larger than 0x63 to support limit single process feature.\n"); + return 0; } misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; misc_pkt.change_config.opcode = @@ -710,6 +714,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(mes->adev->dev, + "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); + if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = @@ -783,6 +793,32 @@ static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__RESET, api_status)); } +static int mes_v11_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_reset_queue_pkt.doorbell_offset_addr = + mes->hung_queue_db_array_gpu_addr; + + if (input->detect_only) + mes_reset_queue_pkt.hang_detect_only = 1; + else + mes_reset_queue_pkt.hang_detect_then_reset = 1; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -792,6 +828,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, .reset_hw_queue = mes_v11_0_reset_hw_queue, + .detect_and_reset_hung_queues = mes_v11_0_detect_and_reset_hung_queues, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1630,10 +1667,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } } r = mes_v11_0_query_sched_status(&adev->mes); @@ -1682,6 +1721,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; + adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b4f17332d466..7f3512d9de07 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -47,6 +47,9 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 +#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ +#define MES12_HUNG_HQD_INFO_OFFSET 4 + static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -108,6 +111,7 @@ static const char *mes_v12_0_opcodes[] = { "SET_SE_MODE", "SET_GANG_SUBMIT", "SET_HW_RSRC_1", + "INVALIDATE_TLBS", }; static const char *mes_v12_0_misc_opcodes[] = { @@ -225,7 +229,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); - if (r < 1 || !*status_ptr) { + + /* + * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success). + * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information. + */ + if (r < 1 || !(lower_32_bits(*status_ptr))) { if (misc_op_str) dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", @@ -567,13 +576,41 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v12_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) @@ -738,6 +775,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) + mes_set_hw_res_pkt.enable_lr_compute_wa = 1; + else + dev_info_once(adev->dev, + "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); /* * Keep oversubscribe timer for sdma . When we have unmapped doorbell @@ -879,6 +921,74 @@ static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__RESET, api_status)); } +static int mes_v12_0_detect_and_reset_hung_queues(struct amdgpu_mes *mes, + struct mes_detect_and_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_reset_queue_pkt.doorbell_offset_addr = + mes->hung_queue_db_array_gpu_addr; + + if (input->detect_only) + mes_reset_queue_pkt.hang_detect_only = 1; + else + mes_reset_queue_pkt.hang_detect_then_reset = 1; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_SCHED_PIPE, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + +static int mes_v12_inv_tlb_convert_hub_id(uint8_t id) +{ + /* + * MES doesn't support invalidate gc_hub on slave xcc individually + * master xcc will invalidate all gc_hub for the partition + */ + if (AMDGPU_IS_GFXHUB(id)) + return 0; + else if (AMDGPU_IS_MMHUB0(id)) + return 1; + else + return -EINVAL; + +} + +static int mes_v12_0_inv_tlbs_pasid(struct amdgpu_mes *mes, + struct mes_inv_tlbs_pasid_input *input) +{ + union MESAPI__INV_TLBS mes_inv_tlbs; + int ret; + + memset(&mes_inv_tlbs, 0, sizeof(mes_inv_tlbs)); + + mes_inv_tlbs.header.type = MES_API_TYPE_SCHEDULER; + mes_inv_tlbs.header.opcode = MES_SCH_API_INV_TLBS; + mes_inv_tlbs.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_inv_tlbs.invalidate_tlbs.inv_sel = 0; + mes_inv_tlbs.invalidate_tlbs.flush_type = input->flush_type; + mes_inv_tlbs.invalidate_tlbs.inv_sel_id = input->pasid; + + /*convert amdgpu_mes_hub_id to mes expected hub_id */ + ret = mes_v12_inv_tlb_convert_hub_id(input->hub_id); + if (ret < 0) + return -EINVAL; + mes_inv_tlbs.invalidate_tlbs.hub_id = ret; + return mes_v12_0_submit_pkt_and_poll_completion(mes, AMDGPU_MES_KIQ_PIPE, + &mes_inv_tlbs, sizeof(mes_inv_tlbs), + offsetof(union MESAPI__INV_TLBS, api_status)); + +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -888,6 +998,8 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, .reset_hw_queue = mes_v12_0_reset_hw_queue, + .invalidate_tlbs_pasid = mes_v12_0_inv_tlbs_pasid, + .detect_and_reset_hung_queues = mes_v12_0_detect_and_reset_hung_queues, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1742,7 +1854,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b) + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); @@ -1792,6 +1905,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; + adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { r = amdgpu_mes_init_microcode(adev, pipe); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 76167fadb292..cc688ae79e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -76,6 +76,8 @@ static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) { + uint64_t gart_start = amdgpu_virt_xgmi_migrate_enabled(adev) ? + adev->gmc.vram_start : adev->gmc.fb_start; uint64_t pt_base; u32 inst_mask; int i; @@ -95,10 +97,10 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) if (adev->gmc.pdb0_bo) { WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); + (u32)(gart_start >> 12)); WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); + (u32)(gart_start >> 44)); WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 134c4ec10887..910337dc28d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -36,40 +36,47 @@ static const char *mmhub_client_ids_v3_0_1[][2] = { [0][0] = "VMC", + [1][0] = "ISPXT", + [2][0] = "ISPIXT", [4][0] = "DCEDMC", [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", - [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [8][0] = "MPM", + [12][0] = "ISPTNR", + [14][0] = "ISPCRD0", + [15][0] = "ISPCRD1", + [16][0] = "ISPCRD2", + [22][0] = "HDP", + [23][0] = "LSDMA", + [24][0] = "JPEG", + [27][0] = "VSCH", + [28][0] = "VCNU", + [29][0] = "VCN", + [1][1] = "ISPXT", + [2][1] = "ISPIXT", [3][1] = "DCEDWB", [4][1] = "DCEDMC", [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", - [8][1] = "MPIO", - [10][1] = "DBGU0", - [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", - [14][1] = "XDP", - [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [8][1] = "MPM", + [10][1] = "ISPMWR0", + [11][1] = "ISPMWR1", + [12][1] = "ISPTNR", + [13][1] = "ISPSWR", + [14][1] = "ISPCWR0", + [15][1] = "ISPCWR1", + [16][1] = "ISPCWR2", + [17][1] = "ISPCWR3", + [18][1] = "XDP", + [21][1] = "OSSSYS", + [22][1] = "HDP", + [23][1] = "LSDMA", + [24][1] = "JPEG", + [27][1] = "VSCH", + [28][1] = "VCNU", + [29][1] = "VCN", }; static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index bc3d6c2fc87a..f6fc9778bc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -40,30 +40,129 @@ static const char *mmhub_client_ids_v3_3[][2] = { [0][0] = "VMC", + [1][0] = "ISPXT", + [2][0] = "ISPIXT", [4][0] = "DCEDMC", [6][0] = "MP0", [7][0] = "MP1", [8][0] = "MPM", + [9][0] = "ISPPDPRD", + [10][0] = "ISPCSTATRD", + [11][0] = "ISPBYRPRD", + [12][0] = "ISPRGBPRD", + [13][0] = "ISPMCFPRD", + [14][0] = "ISPMCFPRD1", + [15][0] = "ISPYUVPRD", + [16][0] = "ISPMCSCRD", + [17][0] = "ISPGDCRD", + [18][0] = "ISPLMERD", + [22][0] = "ISPXT1", + [23][0] = "ISPIXT1", [24][0] = "HDP", [25][0] = "LSDMA", [26][0] = "JPEG", [27][0] = "VPE", + [28][0] = "VSCH", [29][0] = "VCNU", [30][0] = "VCN", + [1][1] = "ISPXT", + [2][1] = "ISPIXT", [3][1] = "DCEDWB", [4][1] = "DCEDMC", + [5][1] = "ISPCSISWR", [6][1] = "MP0", [7][1] = "MP1", [8][1] = "MPM", + [9][1] = "ISPPDPWR", + [10][1] = "ISPCSTATWR", + [11][1] = "ISPBYRPWR", + [12][1] = "ISPRGBPWR", + [13][1] = "ISPMCFPWR", + [14][1] = "ISPMWR0", + [15][1] = "ISPYUVPWR", + [16][1] = "ISPMCSCWR", + [17][1] = "ISPGDCWR", + [18][1] = "ISPLMEWR", + [20][1] = "ISPMWR2", [21][1] = "OSSSYS", + [22][1] = "ISPXT1", + [23][1] = "ISPIXT1", [24][1] = "HDP", [25][1] = "LSDMA", [26][1] = "JPEG", [27][1] = "VPE", + [28][1] = "VSCH", [29][1] = "VCNU", [30][1] = "VCN", }; +static const char *mmhub_client_ids_v3_3_1[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPM", + [24][0] = "HDP", + [25][0] = "LSDMA", + [26][0] = "JPEG0", + [27][0] = "VPE0", + [28][0] = "VSCH", + [29][0] = "VCNU0", + [30][0] = "VCN0", + [32+1][0] = "ISPXT", + [32+2][0] = "ISPIXT", + [32+9][0] = "ISPPDPRD", + [32+10][0] = "ISPCSTATRD", + [32+11][0] = "ISPBYRPRD", + [32+12][0] = "ISPRGBPRD", + [32+13][0] = "ISPMCFPRD", + [32+14][0] = "ISPMCFPRD1", + [32+15][0] = "ISPYUVPRD", + [32+16][0] = "ISPMCSCRD", + [32+17][0] = "ISPGDCRD", + [32+18][0] = "ISPLMERD", + [32+22][0] = "ISPXT1", + [32+23][0] = "ISPIXT1", + [32+26][0] = "JPEG1", + [32+27][0] = "VPE1", + [32+29][0] = "VCNU1", + [32+30][0] = "VCN1", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPM", + [21][1] = "OSSSYS", + [24][1] = "HDP", + [25][1] = "LSDMA", + [26][1] = "JPEG0", + [27][1] = "VPE0", + [28][1] = "VSCH", + [29][1] = "VCNU0", + [30][1] = "VCN0", + [32+1][1] = "ISPXT", + [32+2][1] = "ISPIXT", + [32+5][1] = "ISPCSISWR", + [32+9][1] = "ISPPDPWR", + [32+10][1] = "ISPCSTATWR", + [32+11][1] = "ISPBYRPWR", + [32+12][1] = "ISPRGBPWR", + [32+13][1] = "ISPMCFPWR", + [32+14][1] = "ISPMWR0", + [32+15][1] = "ISPYUVPWR", + [32+16][1] = "ISPMCSCWR", + [32+17][1] = "ISPGDCWR", + [32+18][1] = "ISPLMEWR", + [32+19][1] = "ISPMWR1", + [32+20][1] = "ISPMWR2", + [32+22][1] = "ISPXT1", + [32+23][1] = "ISPIXT1", + [32+26][1] = "JPEG1", + [32+27][1] = "VPE1", + [32+29][1] = "VCNU1", + [32+30][1] = "VCN1", +}; + static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid, uint32_t flush_type) { @@ -102,12 +201,16 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 3, 0): - case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? mmhub_client_ids_v3_3[cid][rw] : cid == 0x140 ? "UMSCH" : NULL; break; + case IP_VERSION(3, 3, 1): + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3_1) ? + mmhub_client_ids_v3_3_1[cid][rw] : + cid == 0x140 ? "UMSCH" : NULL; + break; default: mmhub_cid = NULL; break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index f2ab5001b492..951998454b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -37,39 +37,31 @@ static const char *mmhub_client_ids_v4_1_0[][2] = { [0][0] = "VMC", [4][0] = "DCEDMC", - [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [16][0] = "LSDMA", + [17][0] = "JPEG", + [19][0] = "VCNU", + [22][0] = "VSCH", + [23][0] = "HDP", + [32+23][0] = "VCNRD", [3][1] = "DCEDWB", [4][1] = "DCEDMC", - [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", [8][1] = "MPIO", [10][1] = "DBGU0", [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", + [12][1] = "DBGUNBIO", [14][1] = "XDP", [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [16][1] = "LSDMA", + [17][1] = "JPEG", + [18][1] = "VCNWR", + [19][1] = "VCNU", + [22][1] = "VSCH", + [23][1] = "HDP", }; static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 48101a34e049..9a40107a0869 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -292,14 +292,32 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } } -static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work) +static void xgpu_ai_mailbox_req_bad_pages_work(struct work_struct *work) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); if (down_read_trylock(&adev->reset_domain->sem)) { amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_request_bad_pages(adev); + up_read(&adev->reset_domain->sem); + } +} + +/** + * xgpu_ai_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information + * @work: pointer to the work_struct + * + * This work handler is triggered when bad pages are ready, and it reinitializes + * the data exchange region to retrieve updated bad page information from the host. + */ +static void xgpu_ai_mailbox_handle_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_init_data_exchange(adev); up_read(&adev->reset_domain->sem); } @@ -327,10 +345,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_READY: + xgpu_ai_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.handle_bad_pages_work); + break; case IDH_RAS_BAD_PAGES_NOTIFICATION: xgpu_ai_mailbox_send_ack(adev); if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.bad_pages_work); + schedule_work(&adev->virt.req_bad_pages_work); break; case IDH_UNRECOV_ERR_NOTIFICATION: xgpu_ai_mailbox_send_ack(adev); @@ -415,7 +438,8 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); - INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work); + INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_ai_mailbox_req_bad_pages_work); + INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_ai_mailbox_handle_bad_pages_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index f6d8597452ed..e5282a5d05d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -202,8 +202,8 @@ send_request: case IDH_REQ_RAS_CPER_DUMP: event = IDH_RAS_CPER_DUMP_READY; break; - case IDH_REQ_RAS_BAD_PAGES: - event = IDH_RAS_BAD_PAGES_READY; + case IDH_REQ_RAS_CHK_CRITI: + event = IDH_REQ_RAS_CHK_CRITI_READY; break; default: break; @@ -359,14 +359,32 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } } -static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work) +static void xgpu_nv_mailbox_req_bad_pages_work(struct work_struct *work) { - struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work); + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, req_bad_pages_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); if (down_read_trylock(&adev->reset_domain->sem)) { amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_request_bad_pages(adev); + up_read(&adev->reset_domain->sem); + } +} + +/** + * xgpu_nv_mailbox_handle_bad_pages_work - Reinitialize the data exchange region to get fresh bad page information + * @work: pointer to the work_struct + * + * This work handler is triggered when bad pages are ready, and it reinitializes + * the data exchange region to retrieve updated bad page information from the host. + */ +static void xgpu_nv_mailbox_handle_bad_pages_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, handle_bad_pages_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + if (down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_virt_fini_data_exchange(adev); amdgpu_virt_init_data_exchange(adev); up_read(&adev->reset_domain->sem); } @@ -397,10 +415,15 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); switch (event) { + case IDH_RAS_BAD_PAGES_READY: + xgpu_nv_mailbox_send_ack(adev); + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.handle_bad_pages_work); + break; case IDH_RAS_BAD_PAGES_NOTIFICATION: xgpu_nv_mailbox_send_ack(adev); if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.bad_pages_work); + schedule_work(&adev->virt.req_bad_pages_work); break; case IDH_UNRECOV_ERR_NOTIFICATION: xgpu_nv_mailbox_send_ack(adev); @@ -485,7 +508,8 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev) } INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work); - INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work); + INIT_WORK(&adev->virt.req_bad_pages_work, xgpu_nv_mailbox_req_bad_pages_work); + INIT_WORK(&adev->virt.handle_bad_pages_work, xgpu_nv_mailbox_handle_bad_pages_work); return 0; } @@ -535,6 +559,16 @@ static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev) return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES); } +static int xgpu_nv_check_vf_critical_region(struct amdgpu_device *adev, u64 addr) +{ + uint32_t addr_hi, addr_lo; + + addr_hi = (uint32_t)(addr >> 32); + addr_lo = (uint32_t)(addr & 0xFFFFFFFF); + return xgpu_nv_send_access_requests_with_param( + adev, IDH_REQ_RAS_CHK_CRITI, addr_hi, addr_lo, 0); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -548,4 +582,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_ras_err_count = xgpu_nv_req_ras_err_count, .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, .req_bad_pages = xgpu_nv_req_ras_bad_pages, + .req_ras_chk_criti = xgpu_nv_check_vf_critical_region }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 5808689562cc..c1083e5e41e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -43,6 +43,7 @@ enum idh_request { IDH_REQ_RAS_ERROR_COUNT = 203, IDH_REQ_RAS_CPER_DUMP = 204, IDH_REQ_RAS_BAD_PAGES = 205, + IDH_REQ_RAS_CHK_CRITI = 206 }; enum idh_event { @@ -62,6 +63,7 @@ enum idh_event { IDH_RAS_BAD_PAGES_READY = 15, IDH_RAS_BAD_PAGES_NOTIFICATION = 16, IDH_UNRECOV_ERR_NOTIFICATION = 17, + IDH_REQ_RAS_CHK_CRITI_READY = 18, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index d5002ff931d8..860bc5cb03c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -151,9 +151,9 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 -+ * BIF_SDMA4_DOORBELL_RANGE: -+ * ARCTURUS: 0x3be0 -+ * ALDEBARAN: 0x3be4 + * BIF_SDMA4_DOORBELL_RANGE: + * ARCTURUS: 0x3be0 + * ALDEBARAN: 0x3be4 */ if (adev->asic_type == CHIP_ALDEBARAN && instance == 4) reg = instance + 0x4 + 0x1 + diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index a376f072700d..1c22bc11c1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -31,9 +31,6 @@ #define NPS_MODE_MASK 0x000000FFL -/* Core 0 Port 0 counter */ -#define smnPCIEP_NAK_COUNTER 0x1A340218 - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -467,22 +464,6 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) } } -static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) -{ - u32 val, nak_r, nak_g; - - if (adev->flags & AMD_IS_APU) - return 0; - - /* Get the number of NAKs received and generated */ - val = RREG32_PCIE(smnPCIEP_NAK_COUNTER); - nak_r = val & 0xFFFF; - nak_g = val >> 16; - - /* Add the total number of NAKs, i.e the number of replays */ - return (nak_r + nak_g); -} - #define MMIO_REG_HOLE_OFFSET 0x1A000 static void nbio_v7_9_set_reg_remap(struct amdgpu_device *adev) @@ -524,7 +505,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested, .init_registers = nbio_v7_9_init_registers, - .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, .set_reg_remap = nbio_v7_9_set_reg_remap, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 83e9782aef39..8f4817404f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -31,5 +31,6 @@ extern const struct amdgpu_ip_block_version nv_common_ip_block; void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); void nv_set_virt_ops(struct amdgpu_device *adev); +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index f4a91b126c73..73f87131a7e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -106,7 +106,9 @@ enum psp_gfx_cmd_id /*IDs of performance monitoring/profiling*/ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */ /* Dynamic memory partitioninig (NPS mode change)*/ - GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */ + GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */ + GFX_CMD_ID_FB_FW_RESERV_ADDR = 0x00000050, /* Query FW reservation addr */ + GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR = 0x00000051, /* Query FW reservation extended addr */ }; /* PSP boot config sub-commands */ @@ -404,11 +406,19 @@ struct psp_gfx_uresp_bootcfg { uint32_t boot_cfg; /* boot config data */ }; +/* Command-specific response for fw reserve info */ +struct psp_gfx_uresp_fw_reserve_info { + uint32_t reserve_base_address_hi; + uint32_t reserve_base_address_lo; + uint32_t reserve_size; +}; + /* Union of command-specific responses for GPCOM ring. */ union psp_gfx_uresp { struct psp_gfx_uresp_reserved reserved; struct psp_gfx_uresp_bootcfg boot_cfg; struct psp_gfx_uresp_fwar_db_info fwar_db_info; + struct psp_gfx_uresp_fw_reserve_info fw_reserve_info; }; /* Structure of GFX Response buffer. diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 145186a1e48f..3584b8c18fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -94,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -115,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 215543575f47..64b240b51f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -149,14 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) int ret; int retry_loop; - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -252,8 +250,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -277,11 +275,13 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -317,13 +317,15 @@ static int psp_v11_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); return ret; @@ -347,8 +349,9 @@ static int psp_v11_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -381,7 +384,8 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -393,17 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) msleep(500); - offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); - - if (ret) { - DRM_INFO("psp mode 1 reset failed!\n"); - return -EINVAL; - } - - DRM_INFO("psp mode1 reset succeed \n"); - return 0; } @@ -421,8 +414,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -601,7 +595,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -638,7 +632,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36); @@ -659,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_get_wptr = psp_v11_0_ring_get_wptr, .ring_set_wptr = psp_v11_0_ring_set_wptr, .load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw, - .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw + .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw, + .wait_for_bootloader = psp_v11_0_wait_for_bootloader }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c index 5697760a819b..93787a90d598 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -41,8 +41,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, @@ -50,8 +51,9 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -87,13 +89,15 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -117,8 +121,9 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 80153f837470..4c6450d62299 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -82,7 +82,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -97,7 +97,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) psp_gfxdrv_command_reg); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -118,7 +118,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -133,8 +133,8 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) psp_gfxdrv_command_reg); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -163,7 +163,7 @@ static int psp_v12_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -184,11 +184,13 @@ static int psp_v12_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -219,7 +221,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -233,7 +236,8 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, + 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df612fd9cc50..af4a7d7c4abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); @@ -180,7 +182,7 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) ready having bit 31 of C2PMSG_33 set to 1 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; @@ -211,7 +213,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -360,8 +362,8 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); if (!ret) psp_v13_0_init_sos_version(psp); @@ -382,8 +384,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -391,8 +394,9 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -428,13 +432,15 @@ static int psp_v13_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -458,8 +464,9 @@ static int psp_v13_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -522,8 +529,9 @@ static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -675,7 +683,7 @@ static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -712,7 +720,7 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36); @@ -737,8 +745,9 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -762,7 +771,7 @@ static int psp_v13_0_update_spirom(struct psp_context *psp, /* Confirm PSP is ready to start */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; @@ -797,7 +806,7 @@ static int psp_v13_0_dump_spirom(struct psp_context *psp, /* Confirm PSP is ready to start */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; @@ -924,8 +933,9 @@ static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val, WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id); WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index eaa5512a21da..5f39a2edcc95 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -76,11 +76,9 @@ static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -185,8 +183,8 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -204,8 +202,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -213,8 +212,9 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -250,13 +250,15 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -280,8 +282,9 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 256288c6cd78..38dfc5c19f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -34,7 +34,9 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin"); @@ -109,11 +111,9 @@ static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -228,9 +228,10 @@ static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), - 0, true); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -248,8 +249,9 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, @@ -257,8 +259,9 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* there might be handshake issue with hardware which needs delay */ mdelay(20); /* Wait for response flag (bit 31) */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -294,13 +297,15 @@ static int psp_v14_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -324,8 +329,9 @@ static int psp_v14_0_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -388,8 +394,9 @@ static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -540,8 +547,9 @@ static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc */ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -577,8 +585,9 @@ static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); @@ -602,11 +611,13 @@ static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -629,8 +640,9 @@ static int psp_v14_0_update_spirom(struct psp_context *psp, int ret; /* Confirm PSP is ready to start */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index f6b75e3e47ff..833830bc3e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -91,7 +91,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -109,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -130,7 +130,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -147,8 +147,8 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -168,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); /* Change IH ring for UMC */ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); @@ -180,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); } static int psp_v3_1_ring_create(struct psp_context *psp, @@ -217,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_101), 0x80000000, - 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, 0); } else { /* Write low address of the ring to C2PMSG_69 */ @@ -240,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_64), 0x80000000, - 0x8000FFFF, false); - + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, 0); } return ret; } @@ -267,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, 0); return ret; } @@ -311,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -325,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index bcde34e4e0a1..36b1ca73c2ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { @@ -109,6 +110,8 @@ static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring); static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring); +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -490,7 +493,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -502,6 +505,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -995,6 +1001,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ @@ -1337,6 +1344,7 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = { .stop_kernel_queue = &sdma_v4_4_2_stop_queue, .start_kernel_queue = &sdma_v4_4_2_restore_queue, + .soft_reset_kernel_queue = &sdma_v4_4_2_soft_reset_engine, }; static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) @@ -1648,45 +1656,24 @@ static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t i return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0; } -static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t instance_id = ring->me; - - return sdma_v4_4_2_is_queue_selected(adev, instance_id, false); -} - -static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t instance_id = ring->me; - - if (!adev->sdma.has_page_queue) - return false; - - return sdma_v4_4_2_is_queue_selected(adev, instance_id, true); -} - -static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - u32 id = GET_INST(SDMA0, ring->me); + u32 id = ring->me; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - - amdgpu_amdkfd_suspend(adev, false); - r = amdgpu_sdma_reset_engine(adev, id); - amdgpu_amdkfd_resume(adev, false); - + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, id, false); + amdgpu_amdkfd_resume(adev, true); return r; } static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 instance_id = GET_INST(SDMA0, ring->me); + u32 instance_id = ring->me; u32 inst_mask; uint64_t rptr; @@ -1725,7 +1712,7 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 inst_mask; - int i; + int i, r; inst_mask = 1 << ring->me; udelay(50); @@ -1742,7 +1729,18 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) return -ETIMEDOUT; } - return sdma_v4_4_2_inst_start(adev, inst_mask, true); + r = sdma_v4_4_2_inst_start(adev, inst_mask, true); + + return r; +} + +static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev, + u32 instance_id) +{ + /* For SDMA 4.x, use the existing DPM interface for backward compatibility + * we need to convert the logical instance ID to physical instance ID before reset. + */ + return amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); } static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, @@ -2139,7 +2137,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .reset = sdma_v4_4_2_reset_queue, - .is_guilty = sdma_v4_4_2_ring_is_guilty, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -2172,7 +2169,6 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .reset = sdma_v4_4_2_reset_queue, - .is_guilty = sdma_v4_4_2_page_ring_is_guilty, }; static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9505ae96fbec..7dc67a22a7a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1399,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1427,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 2): case IP_VERSION(5, 0, 5): - if (adev->sdma.instance[0].fw_version >= 35) + if ((adev->sdma.instance[0].fw_version >= 35) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1538,12 +1540,27 @@ static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - u32 inst_id = ring->me; + int r; + + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); + return -EINVAL; + } - return amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) @@ -1610,6 +1627,7 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index a6e612b4a892..3bd44c24f692 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1318,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1346,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 2, 2): case IP_VERSION(5, 2, 3): case IP_VERSION(5, 2, 4): - if (adev->sdma.instance[0].fw_version >= 76) + if ((adev->sdma.instance[0].fw_version >= 76) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(5, 2, 5): - if (adev->sdma.instance[0].fw_version >= 34) + if ((adev->sdma.instance[0].fw_version >= 34) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1451,12 +1454,27 @@ static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block) return -ETIMEDOUT; } -static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - u32 inst_id = ring->me; + int r; + + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); + return -EINVAL; + } - return amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) @@ -1526,6 +1544,7 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 5a70ae17be04..db6e41967f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): - if (adev->sdma.instance[0].fw_version >= 21) + if ((adev->sdma.instance[0].fw_version >= 21) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1374,9 +1375,42 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 18) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 0): + if ((adev->sdma.instance[0].fw_version >= 14) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 1): + if ((adev->sdma.instance[0].fw_version >= 17) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 2): + if ((adev->sdma.instance[0].fw_version >= 15) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 1, 3): + if ((adev->sdma.instance[0].fw_version >= 10) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) @@ -1537,29 +1571,29 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) return r; - return sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ad47d0bdf777..326ecc8d37d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -802,29 +802,29 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) return false; } -static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; } - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); - return -EINVAL; - } + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) return r; - return sdma_v7_0_gfx_resume_instance(adev, i, true); + r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } /** @@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); - adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) @@ -1349,9 +1350,15 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 7966358) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index dd2d66090d23..68aef47254a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -743,7 +743,7 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) DRM_ERROR("Failed to register hw i2c, err: %d\n", res); @@ -752,9 +752,6 @@ int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev) { - struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus; - - i2c_del_adapter(control); adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c457be3a3c56..9785fada4fa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -741,7 +741,6 @@ static void soc15_reg_base_init(struct amdgpu_device *adev) void soc15_set_virt_ops(struct amdgpu_device *adev) { adev->virt.ops = &xgpu_ai_virt_ops; - /* init soc15 reg base early enough so we can * request request full access for sriov before * set_ip_blocks. */ @@ -1218,6 +1217,8 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) AMD_PG_SUPPORT_JPEG; /*TODO: need a new external_rev_id for GC 9.4.4? */ adev->external_rev_id = adev->rev_id + 0x46; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->external_rev_id = adev->rev_id + 0x50; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index ef7c603b50ae..c8ac11a9cdef 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -118,7 +118,6 @@ int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); int arct_reg_base_init(struct amdgpu_device *adev); int aldebaran_reg_base_init(struct amdgpu_device *adev); -void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index e590cbdd8de9..8dc32787d625 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -536,8 +536,11 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); - if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0)) + /* The IP block decode of consumption is SMU */ + if (hwid != MCA_UMC_HWID_V12_0 || mcatype != MCA_UMC_MCATYPE_V12_0) { + con->umc_ecc_log.consumption_q_count++; return 0; + } if (!status) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 5dbaebb592b3..2e79a3afc774 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -623,7 +623,22 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, * * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. * - * Initialize the hardware, boot up the VCPU and do some testing + * Initialize the hardware, boot up the VCPU and do some testing. + * + * On SI, the UVD is meant to be used in a specific power state, + * or alternatively the driver can manually enable its clock. + * In amdgpu we use the dedicated UVD power state when DPM is enabled. + * Calling amdgpu_dpm_enable_uvd makes DPM select the UVD power state + * for the SMU and afterwards enables the UVD clock. + * This is automatically done by amdgpu_uvd_ring_begin_use when work + * is submitted to the UVD ring. Here, we have to call it manually + * in order to power up UVD before firmware validation. + * + * Note that we must not disable the UVD clock here, as that would + * cause the ring test to fail. However, UVD is powered off + * automatically after the ring test: amdgpu_uvd_ring_end_use calls + * the UVD idle work handler which will disable the UVD clock when + * all fences are signalled. */ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) { @@ -633,6 +648,15 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) int r; uvd_v3_1_mc_resume(adev); + uvd_v3_1_enable_mgcg(adev, true); + + /* Make sure UVD is powered during FW validation. + * It's going to be automatically powered off after the ring test. + */ + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); r = uvd_v3_1_fw_validate(adev); if (r) { @@ -640,9 +664,6 @@ static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block) return r; } - uvd_v3_1_enable_mgcg(adev, true); - amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); - uvd_v3_1_start(adev); r = amdgpu_ring_test_helper(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 1c07b701d0e4..ceb94bbb03a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -217,7 +217,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -281,7 +282,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 9d237b5937fb..1f8866f3f63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -225,7 +225,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; @@ -288,7 +289,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job, + AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index c74947705d77..a316797875a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -193,7 +193,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode; if (amdgpu_vcnfw_log) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; amdgpu_vcn_fwlog_init(adev->vcn.inst); @@ -230,11 +230,11 @@ static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) jpeg_v1_0_sw_fini(ip_block); - r = amdgpu_vcn_sw_fini(adev, 0); + amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -1338,7 +1338,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1399,7 +1398,6 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 148b651be7ca..8897dcc9c1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -98,6 +98,8 @@ static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst, static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_0_start_sriov(struct amdgpu_device *adev); +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst); + /** * vcn_v2_0_early_init - set function pointers and load microcode * @@ -134,10 +136,8 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -213,6 +213,12 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) } adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + adev->vcn.inst[0].reset = vcn_v2_0_reset; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -224,14 +230,13 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_0, ARRAY_SIZE(vcn_reg_list_2_0)); + if (r) + return r; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; return 0; } @@ -247,7 +252,7 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) { int r, idx; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; if (drm_dev_enter(adev_to_drm(adev), &idx)) { fw_shared->present_flag_0 = 0; @@ -260,11 +265,11 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vcn_sw_fini(adev, 0); + amdgpu_vcn_sysfs_reset_mask_fini(adev); - kfree(adev->vcn.ip_dump); + amdgpu_vcn_sw_fini(adev, 0); - return r; + return 0; } /** @@ -848,9 +853,10 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; + int ret; vcn_v2_0_enable_static_power_gating(vinst); @@ -934,8 +940,13 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) UVD, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, 0, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, 0, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); @@ -990,7 +1001,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1297,7 +1308,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); @@ -1355,6 +1366,16 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, return 0; } +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_0_stop(vinst); + if (r) + return r; + return vcn_v2_0_start(vinst); +} + static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2071,66 +2092,6 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } -static void vcn_v2_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v2_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2144,8 +2105,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .wait_for_idle = vcn_v2_0_wait_for_idle, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_0_dump_ip_state, - .print_ip_state = vcn_v2_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { @@ -2176,6 +2137,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { @@ -2205,6 +2167,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 58b527a6b795..cebee453871c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -102,6 +102,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -115,7 +116,6 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i, j; - int r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn_inst *v = &adev->vcn.inst[i]; @@ -148,15 +148,7 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - mutex_lock(&adev->vcn.workload_profile_mutex); - if (adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); - adev->vcn.workload_profile_active = false; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); + amdgpu_vcn_put_profile(adev); } else { schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); } @@ -166,7 +158,6 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me]; - int r = 0; atomic_inc(&adev->vcn.inst[0].total_submission_cnt); @@ -176,20 +167,6 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) * the delayed work so there is no one else to set it to false * and we don't care if someone else sets it to true. */ - if (adev->vcn.workload_profile_active) - goto pg_lock; - - mutex_lock(&adev->vcn.workload_profile_mutex); - if (!adev->vcn.workload_profile_active) { - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); - if (r) - dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); - adev->vcn.workload_profile_active = true; - } - mutex_unlock(&adev->vcn.workload_profile_mutex); - -pg_lock: mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); @@ -217,6 +194,7 @@ pg_lock: v->pause_dpg_mode(v, &new_state); } mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); + amdgpu_vcn_get_profile(adev); } static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) @@ -296,12 +274,10 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_ring *ring; int i, j, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << j)) continue; @@ -404,8 +380,14 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + adev->vcn.inst[j].reset = vcn_v2_5_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -416,14 +398,13 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_2_5, ARRAY_SIZE(vcn_reg_list_2_5)); + if (r) + return r; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; return 0; } @@ -439,7 +420,7 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) { int i, r, idx; struct amdgpu_device *adev = ip_block->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { @@ -455,17 +436,15 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } - kfree(adev->vcn.ip_dump); - return 0; } @@ -1019,9 +998,10 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, @@ -1112,8 +1092,13 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) VCN, 0, mmUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -1170,7 +1155,7 @@ static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1682,7 +1667,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; @@ -1816,6 +1801,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -1914,6 +1900,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1942,6 +1929,16 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_5_stop(vinst); + if (r) + return r; + return vcn_v2_5_start(vinst); +} + static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2102,66 +2099,6 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v2_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v2_5_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); - } -} - static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -2175,8 +2112,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = vcn_v2_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -2192,8 +2129,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v2_5_dump_ip_state, - .print_ip_state = vcn_v2_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 9fb0d5380589..d9cf8f0feeb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -110,6 +110,7 @@ static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst); static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); @@ -174,8 +175,6 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; /* @@ -192,7 +191,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -289,22 +288,27 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + adev->vcn.inst[i].reset = vcn_v3_0_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_3_0, ARRAY_SIZE(vcn_reg_list_3_0)); + if (r) + return r; + + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; return 0; } @@ -323,7 +327,7 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -338,17 +342,16 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } - kfree(adev->vcn.ip_dump); return 0; } @@ -1026,9 +1029,10 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1, @@ -1121,8 +1125,13 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -1185,7 +1194,7 @@ static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int j, k, r; @@ -1706,7 +1715,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1825,7 +1834,7 @@ static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring) static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - volatile struct amdgpu_fw_shared *fw_shared; + struct amdgpu_fw_shared *fw_shared; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */ @@ -1875,15 +1884,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); @@ -2033,6 +2046,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -2131,6 +2145,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -2164,6 +2179,18 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v3_0_stop(vinst); + if (r) + return r; + vcn_v3_0_enable_clock_gating(vinst); + vcn_v3_0_enable_static_power_gating(vinst); + return vcn_v3_0_start(vinst); +} + static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2315,67 +2342,6 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v3_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - uint32_t inst_off; - bool is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v3_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2389,8 +2355,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .wait_for_idle = vcn_v3_0_wait_for_idle, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v3_0_dump_ip_state, - .print_ip_state = vcn_v3_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index b5071f77f78d..3ae666522d57 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -148,7 +148,7 @@ static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block) static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); @@ -183,8 +183,6 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - uint32_t *ptr; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -241,7 +239,8 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); @@ -254,14 +253,9 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0, ARRAY_SIZE(vcn_reg_list_4_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -284,7 +278,7 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -308,13 +302,8 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } - - kfree(adev->vcn.ip_dump); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -1008,9 +997,10 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -1093,8 +1083,13 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1142,7 +1137,7 @@ static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -1359,8 +1354,8 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) struct mmsch_v4_0_cmd_end end = { {0} }; struct mmsch_v4_0_init_header header; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -1611,7 +1606,7 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int r = 0; @@ -1623,7 +1618,6 @@ static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { vcn_v4_0_stop_dpg_mode(vinst); - r = 0; goto done; } @@ -1807,15 +1801,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; - - /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) - return -EINVAL; + struct dma_fence *fence; /* if VCN0 is harvested, we can't support AV1 */ if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) return -EINVAL; + /* wait for all jobs to finish before switching to instance 0 */ + fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); + if (fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; drm_sched_entity_modify_sched(job->base.entity, scheds, 1); @@ -1906,22 +1904,16 @@ out: #define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) - #define RADEON_VCN_ENGINE_INFO (0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 - #define RENCODE_ENCODE_STANDARD_AV1 2 #define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +/* return the offset in ib if id is found, -1 otherwise */ +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start) { int i; - for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { + for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) { if (ib->ptr[i + 1] == id) return i; } @@ -1936,56 +1928,56 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_vcn_decode_buffer *decode_buffer; uint64_t addr; uint32_t val; - int idx; + int idx = 0, sidx; /* The first instance can decode anything */ if (!ring->me) return 0; - /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ - idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, - RADEON_VCN_ENGINE_INFO_MAX_OFFSET); - if (idx < 0) /* engine info is missing */ - return 0; - - val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) - return 0; - - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_dec_msg(p, job, addr); - } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { - idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, - RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); - if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) - return vcn_v4_0_limit_sched(p, job); + while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { + val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { + sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); + if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1) + return vcn_v4_0_limit_sched(p, job); + } + idx += ib->ptr[idx] / 4; } return 0; } -static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - - vcn_v4_0_stop(vinst); - vcn_v4_0_start(vinst); - - return amdgpu_ring_test_helper(ring); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v4_0_stop(vinst); + if (r) + return r; + r = vcn_v4_0_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), + .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -2241,67 +2233,6 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], - i)); - } -} - static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2315,8 +2246,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .wait_for_idle = vcn_v4_0_wait_for_idle, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_dump_ip_state, - .print_ip_state = vcn_v4_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 5a33140f5723..eacf4e93ba2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -134,6 +134,19 @@ static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + if (amdgpu_dpm_reset_vcn_is_supported(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + return 0; +} + static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { struct amdgpu_vcn4_fw_shared *fw_shared; @@ -160,8 +173,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; int i, r, vcn_inst; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - uint32_t *ptr; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, @@ -201,7 +212,11 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + + /* There are no per-instance irq source IDs on 4.0.3, the IH + * packets use a separate field to differentiate instances. + */ + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[0].irq, 0, AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); if (r) @@ -213,10 +228,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->vcn.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -231,20 +242,11 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) } } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - - r = amdgpu_vcn_sysfs_reset_mask_init(adev); + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_3, ARRAY_SIZE(vcn_reg_list_4_0_3)); if (r) return r; - return 0; + return amdgpu_vcn_sysfs_reset_mask_init(adev); } /** @@ -261,7 +263,7 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(&adev->ddev, &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; @@ -281,13 +283,8 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } - - kfree(adev->vcn.ip_dump); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -391,7 +388,7 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); return 0; @@ -848,10 +845,10 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; - int vcn_inst; + int vcn_inst, ret; uint32_t tmp; vcn_inst = GET_INST(VCN, inst_idx); @@ -944,8 +941,13 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, 0, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1010,8 +1012,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) struct mmsch_v4_0_cmd_end end = { {0} }; struct mmsch_v4_0_3_init_header header; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -1185,7 +1187,7 @@ static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; int j, k, r, vcn_inst; uint32_t tmp; @@ -1395,7 +1397,7 @@ static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; int r = 0, vcn_inst; uint32_t tmp; @@ -1594,18 +1596,16 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { int r = 0; int vcn_inst; struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; - if (amdgpu_sriov_vf(ring->adev)) - return -EOPNOTSUPP; - - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); vcn_inst = GET_INST(VCN, ring->me); r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); @@ -1620,9 +1620,8 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); vcn_v4_0_3_hw_init_inst(vinst); vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); - r = amdgpu_ring_test_helper(ring); - return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { @@ -1875,71 +1874,10 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs; } -static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_3_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off, inst_id; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_id = GET_INST(VCN, i); - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], - inst_id)); - } -} - static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, + .late_init = vcn_v4_0_3_late_init, .sw_init = vcn_v4_0_3_sw_init, .sw_fini = vcn_v4_0_3_sw_fini, .hw_init = vcn_v4_0_3_hw_init, @@ -1950,8 +1888,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .wait_for_idle = vcn_v4_0_3_wait_for_idle, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_3_dump_ip_state, - .print_ip_state = vcn_v4_0_3_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 16ade84facc7..b107ee80e472 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -147,12 +147,9 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - uint32_t *ptr; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -220,7 +217,8 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) } adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -232,15 +230,9 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) return r; } - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } - return 0; + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_4_0_5, ARRAY_SIZE(vcn_reg_list_4_0_5)); + + return r; } /** @@ -257,7 +249,7 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -278,13 +270,9 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; + amdgpu_vcn_sw_fini(adev, i); } - kfree(adev->vcn.ip_dump); - return 0; } @@ -922,9 +910,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -1005,8 +994,13 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1053,7 +1047,7 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -1272,7 +1266,7 @@ static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int r = 0; @@ -1465,18 +1459,22 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - - vcn_v4_0_5_stop(vinst); - vcn_v4_0_5_start(vinst); - - return amdgpu_ring_test_helper(ring); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v4_0_5_stop(vinst); + if (r) + return r; + r = vcn_v4_0_5_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { @@ -1591,7 +1589,7 @@ static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1699,67 +1697,6 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v4_0_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -static void vcn_v4_0_5_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], - i)); - } -} - static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1773,8 +1710,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .wait_for_idle = vcn_v4_0_5_wait_for_idle, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v4_0_5_dump_ip_state, - .print_ip_state = vcn_v4_0_5_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index f8e3f0b882da..0202df5db1e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -115,21 +115,6 @@ static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) return 0; } -void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev) -{ - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; - - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } -} - /** * vcn_v5_0_0_sw_init - sw init for VCN block * @@ -144,7 +129,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -198,9 +183,12 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - vcn_v5_0_0_alloc_ip_dump(adev); + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0, ARRAY_SIZE(vcn_reg_list_5_0)); + if (r) + return r; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -223,7 +211,7 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -244,13 +232,8 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_vcn_sysfs_reset_mask_fini(adev); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } - - kfree(adev->vcn.ip_dump); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); return 0; } @@ -709,9 +692,10 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; + int ret; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -765,8 +749,13 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + if (ret) { + dev_err(adev->dev, "%s: vcn sram load failed %d\n", __func__, ret); + return ret; + } + } ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -813,7 +802,7 @@ static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r; @@ -1006,7 +995,7 @@ static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int r = 0; @@ -1192,18 +1181,22 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - - vcn_v5_0_0_stop(vinst); - vcn_v5_0_0_start(vinst); - - return amdgpu_ring_test_helper(ring); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = vcn_v5_0_0_stop(vinst); + if (r) + return r; + r = vcn_v5_0_0_start(vinst); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { @@ -1315,7 +1308,7 @@ static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { struct amdgpu_device *adev = ip_block->adev; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1423,67 +1416,6 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, - struct drm_printer *p) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t inst_off, is_powered; - - if (!adev->vcn.ip_dump) - return; - - drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) { - drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); - continue; - } - - inst_off = i * reg_count; - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) { - drm_printf(p, "\nActive Instance:VCN%d\n", i); - for (j = 0; j < reg_count; j++) - drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, - adev->vcn.ip_dump[inst_off + j]); - } else { - drm_printf(p, "\nInactive Instance:VCN%d\n", i); - } - } -} - -void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block) -{ - struct amdgpu_device *adev = ip_block->adev; - int i, j; - bool is_powered; - uint32_t inst_off; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - - if (!adev->vcn.ip_dump) - return; - - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - inst_off = i * reg_count; - /* mmUVD_POWER_STATUS is always readable and is first element of the array */ - adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); - is_powered = (adev->vcn.ip_dump[inst_off] & - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; - - if (is_powered) - for (j = 1; j < reg_count; j++) - adev->vcn.ip_dump[inst_off + j] = - RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); - } -} - static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1497,8 +1429,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .wait_for_idle = vcn_v5_0_0_wait_for_idle, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v5_0_0_dump_ip_state, - .print_ip_state = vcn_v5_0_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h index b8927652bc50..51bbccd4360f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -32,11 +32,6 @@ #define VCN_VID_IP_ADDRESS 0x0 #define VCN_AON_IP_ADDRESS 0x30000 -void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev); -void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, - struct drm_printer *p); -void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block); - extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; #endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 338cf43c45fe..714350cabf2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -40,6 +40,40 @@ #include <drm/drm_drv.h> +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0_1[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev); static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); @@ -79,6 +113,25 @@ static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static int vcn_v5_0_1_late_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(13, 0, 12): + if ((adev->psp.sos.fw_version >= 0x00450025) && amdgpu_dpm_reset_vcn_is_supported(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + default: + break; + } + + return 0; +} + static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx) { struct amdgpu_vcn5_fw_shared *fw_shared; @@ -153,17 +206,23 @@ static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) vcn_v5_0_1_fw_shared_init(adev, i); } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->vcn.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; } - vcn_v5_0_0_alloc_ip_dump(adev); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { + r = amdgpu_vcn_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize vcn ras block!\n"); + return r; + } + } + + r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_5_0_1, ARRAY_SIZE(vcn_reg_list_5_0_1)); + if (r) + return r; return amdgpu_vcn_sysfs_reset_mask_init(adev); } @@ -182,7 +241,7 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) if (drm_dev_enter(adev_to_drm(adev), &idx)) { for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; @@ -201,15 +260,27 @@ static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) return r; } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - r = amdgpu_vcn_sw_fini(adev, i); - if (r) - return r; - } - amdgpu_vcn_sysfs_reset_mask_fini(adev); - kfree(adev->vcn.ip_dump); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + amdgpu_vcn_sw_fini(adev, i); + + return 0; +} + +static int vcn_v5_0_1_hw_init_inst(struct amdgpu_device *adev, int i) +{ + struct amdgpu_ring *ring; + int vcn_inst; + + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 11 * vcn_inst), + adev->vcn.inst[i].aid_id); return 0; } @@ -225,7 +296,7 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct amdgpu_ring *ring; - int i, r, vcn_inst; + int i, r; if (amdgpu_sriov_vf(adev)) { r = vcn_v5_0_1_start_sriov(adev); @@ -243,14 +314,8 @@ static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; - - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 11 * vcn_inst), - adev->vcn.inst[i].aid_id); + vcn_v5_0_1_hw_init_inst(adev, i); /* Re-init fw_shared, if required */ vcn_v5_0_1_fw_shared_init(adev, i); @@ -284,7 +349,7 @@ static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN) && !amdgpu_sriov_vf(adev)) amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0); return 0; @@ -601,11 +666,11 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, { struct amdgpu_device *adev = vinst->adev; int inst_idx = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared = + struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; - int vcn_inst; + int vcn_inst, ret; uint32_t tmp; vcn_inst = GET_INST(VCN, inst_idx); @@ -666,8 +731,16 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, VCN, 0, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) - amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (indirect) { + ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + if (ret) { + dev_err(adev->dev, "vcn sram load failed %d\n", ret); + return ret; + } + } + + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; /* Pause dpg */ vcn_v5_0_1_pause_dpg_mode(vinst, &state); @@ -681,7 +754,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); @@ -692,6 +765,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, @@ -728,8 +802,8 @@ static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev) struct mmsch_v5_0_cmd_end end = { {0} }; struct mmsch_v5_0_init_header header; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; - volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_fw_shared_rb_setup *rb_setup; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; @@ -903,7 +977,7 @@ static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; int j, k, r, vcn_inst; @@ -1095,7 +1169,7 @@ static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst) { struct amdgpu_device *adev = vinst->adev; int i = vinst->inst; - volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; int r = 0, vcn_inst; @@ -1225,6 +1299,31 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r = 0; + int vcn_inst; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + + vcn_inst = GET_INST(VCN, ring->me); + r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); + return r; + } + + vcn_v5_0_1_hw_init_inst(adev, ring->me); + vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram); + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1253,6 +1352,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = vcn_v5_0_1_ring_reset, }; /** @@ -1456,7 +1556,7 @@ static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { .name = "vcn_v5_0_1", .early_init = vcn_v5_0_1_early_init, - .late_init = NULL, + .late_init = vcn_v5_0_1_late_init, .sw_init = vcn_v5_0_1_sw_init, .sw_fini = vcn_v5_0_1_sw_fini, .hw_init = vcn_v5_0_1_hw_init, @@ -1471,8 +1571,8 @@ static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, .set_powergating_state = vcn_set_powergating_state, - .dump_ip_state = vcn_v5_0_0_dump_ip_state, - .print_ip_state = vcn_v5_0_0_print_ip_state, + .dump_ip_state = amdgpu_vcn_dump_ip_state, + .print_ip_state = amdgpu_vcn_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { @@ -1553,7 +1653,7 @@ static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank /* reference to smu driver if header file */ static int vcn_v5_0_1_err_codes[] = { - 14, 15, /* VCN */ + 14, 15, 47, /* VCN [D|V|S] */ }; static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, @@ -1599,6 +1699,13 @@ static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_commo if (r) goto late_fini; + if (amdgpu_ras_is_supported(adev, ras_block->block) && + adev->vcn.inst->ras_poison_irq.funcs) { + r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0); + if (r) + goto late_fini; + } + return 0; late_fini: diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9b3510e53112..a611a7345125 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -67,7 +67,6 @@ #include "sdma_v2_4.h" #include "sdma_v3_0.h" #include "dce_v10_0.h" -#include "dce_v11_0.h" #include "iceland_ih.h" #include "tonga_ih.h" #include "cz_ih.h" @@ -2124,8 +2123,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); break; @@ -2142,8 +2139,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block); #if defined(CONFIG_DRM_AMD_ACP) @@ -2163,8 +2158,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif - else - amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); #if defined(CONFIG_DRM_AMD_ACP) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index 62e88e5362e9..16e12c9913f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -5,7 +5,7 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT)) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT) || (LOONGARCH && 64BIT)) select HMM_MIRROR select MMU_NOTIFIER select DRM_AMDGPU_USERPTR diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a2149afa5803..0f0719528bcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -22,7 +22,6 @@ */ #include <linux/device.h> -#include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/file.h> @@ -522,15 +521,10 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); } - minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL); - if (!minfo.cu_mask.ptr) - return -ENOMEM; - - retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size); - if (retval) { + minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size); + if (IS_ERR(minfo.cu_mask.ptr)) { pr_debug("Could not copy CU mask from userspace"); - retval = -EFAULT; - goto out; + return PTR_ERR(minfo.cu_mask.ptr); } mutex_lock(&p->mutex); @@ -539,7 +533,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, mutex_unlock(&p->mutex); -out: kfree(minfo.cu_mask.ptr); return retval; } @@ -1071,7 +1064,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, svm_range_list_lock_and_flush_work(&p->svms, current->mm); mutex_lock(&p->svms.lock); mmap_write_unlock(current->mm); - if (interval_tree_iter_first(&p->svms.objects, + + /* Skip a special case that allocates VRAM without VA, + * VA will be invalid of 0. + */ + if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) && + interval_tree_iter_first(&p->svms.objects, args->va_addr >> PAGE_SHIFT, (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { pr_err("Address: 0x%llx already allocated by SVM\n", @@ -2567,8 +2565,8 @@ static int criu_restore(struct file *filep, pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n", args->num_devices, args->num_bos, args->num_objects, args->priv_data_size); - if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size || - !args->num_devices || !args->num_bos) + if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data || + !args->priv_data_size || !args->num_devices) return -EINVAL; mutex_lock(&p->mutex); @@ -3253,8 +3251,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) int retcode = -EINVAL; bool ptrace_attached = false; - if (nr >= AMDKFD_CORE_IOCTL_COUNT) + if (nr >= AMDKFD_CORE_IOCTL_COUNT) { + retcode = -ENOTTY; goto err_i1; + } if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { u32 amdkfd_size; @@ -3267,8 +3267,10 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) asize = amdkfd_size; cmd = ioctl->cmd; - } else + } else { + retcode = -ENOTTY; goto err_i1; + } dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index bf0854bd5555..e9cfb80bd436 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -495,6 +495,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) mutex_init(&kfd->doorbell_mutex); ida_init(&kfd->doorbell_ida); + atomic_set(&kfd->kfd_processes_count, 0); return kfd; } @@ -971,7 +972,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd, kfd_smi_event_update_gpu_reset(node, false, reset_context); } - kgd2kfd_suspend(kfd, false); + kgd2kfd_suspend(kfd, true); for (i = 0; i < kfd->num_nodes; i++) kfd_signal_reset_event(kfd->nodes[i]); @@ -1013,13 +1014,33 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) return 0; } -bool kfd_is_locked(void) +bool kfd_is_locked(struct kfd_dev *kfd) { + uint8_t id = 0; + struct kfd_node *dev; + lockdep_assert_held(&kfd_processes_mutex); - return (kfd_locked > 0); + + /* check reset/suspend lock */ + if (kfd_locked > 0) + return true; + + if (kfd) + return kfd->kfd_dev_lock > 0; + + /* check lock on all cgroup accessible devices */ + while (kfd_topology_enum_kfd_devices(id++, &dev) == 0) { + if (!dev || kfd_devcgroup_check_permission(dev)) + continue; + + if (dev->kfd->kfd_dev_lock > 0) + return true; + } + + return false; } -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc) { struct kfd_node *node; int i; @@ -1027,14 +1048,8 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) if (!kfd->init_complete) return; - /* for runtime suspend, skip locking kfd */ - if (!run_pm) { - mutex_lock(&kfd_processes_mutex); - /* For first KFD device suspend all the KFD processes */ - if (++kfd_locked == 1) - kfd_suspend_all_processes(); - mutex_unlock(&kfd_processes_mutex); - } + if (suspend_proc) + kgd2kfd_suspend_process(kfd); for (i = 0; i < kfd->num_nodes; i++) { node = kfd->nodes[i]; @@ -1042,7 +1057,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) } } -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc) { int ret, i; @@ -1055,14 +1070,36 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; } - /* for runtime resume, skip unlocking kfd */ - if (!run_pm) { - mutex_lock(&kfd_processes_mutex); - if (--kfd_locked == 0) - ret = kfd_resume_all_processes(); - WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error"); - mutex_unlock(&kfd_processes_mutex); - } + if (resume_proc) + ret = kgd2kfd_resume_process(kfd); + + return ret; +} + +void kgd2kfd_suspend_process(struct kfd_dev *kfd) +{ + if (!kfd->init_complete) + return; + + mutex_lock(&kfd_processes_mutex); + /* For first KFD device suspend all the KFD processes */ + if (++kfd_locked == 1) + kfd_suspend_all_processes(); + mutex_unlock(&kfd_processes_mutex); +} + +int kgd2kfd_resume_process(struct kfd_dev *kfd) +{ + int ret = 0; + + if (!kfd->init_complete) + return 0; + + mutex_lock(&kfd_processes_mutex); + if (--kfd_locked == 0) + ret = kfd_resume_all_processes(); + WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error"); + mutex_unlock(&kfd_processes_mutex); return ret; } @@ -1097,7 +1134,15 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) } for (i = 0; i < kfd->num_nodes; i++) { - node = kfd->nodes[i]; + /* Race if another thread in b/w + * kfd_cleanup_nodes and kfree(kfd), + * when kfd->nodes[i] = NULL + */ + if (kfd->nodes[i]) + node = kfd->nodes[i]; + else + return; + spin_lock_irqsave(&node->interrupt_lock, flags); if (node->interrupts_active @@ -1442,24 +1487,62 @@ unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node) kfd_get_num_sdma_engines(node); } -int kgd2kfd_check_and_lock_kfd(void) +int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd) { + struct kfd_process *p; + int r = 0, temp, idx; + mutex_lock(&kfd_processes_mutex); - if (!hash_empty(kfd_processes_table) || kfd_is_locked()) { - mutex_unlock(&kfd_processes_mutex); - return -EBUSY; + + /* kfd_processes_count is per kfd_dev, return -EBUSY without + * further check + */ + if (!!atomic_read(&kfd->kfd_processes_count)) { + pr_debug("process_wq_release not finished\n"); + r = -EBUSY; + goto out; + } + + if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd)) + goto out; + + /* fail under system reset/resume or kfd device is partition switching. */ + if (kfd_is_locked(kfd)) { + r = -EBUSY; + goto out; } - ++kfd_locked; + /* + * ensure all running processes are cgroup excluded from device before mode switch. + * i.e. no pdd was created on the process socket. + */ + idx = srcu_read_lock(&kfd_processes_srcu); + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + int i; + + for (i = 0; i < p->n_pdds; i++) { + if (p->pdds[i]->dev->kfd != kfd) + continue; + + r = -EBUSY; + goto proc_check_unlock; + } + } + +proc_check_unlock: + srcu_read_unlock(&kfd_processes_srcu, idx); +out: + if (!r) + ++kfd->kfd_dev_lock; mutex_unlock(&kfd_processes_mutex); - return 0; + return r; } -void kgd2kfd_unlock_kfd(void) +void kgd2kfd_unlock_kfd(struct kfd_dev *kfd) { mutex_lock(&kfd_processes_mutex); - --kfd_locked; + --kfd->kfd_dev_lock; mutex_unlock(&kfd_processes_mutex); } @@ -1485,6 +1568,25 @@ int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) return ret; } +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.unhalt(node->dqm); + if (r) { + dev_err(kfd_device, "Error in starting scheduler\n"); + return r; + } + } + return 0; +} + int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; @@ -1502,6 +1604,23 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ + struct kfd_node *node; + int i, r; + + if (!kfd->init_complete) + return 0; + + for (i = 0; i < kfd->num_nodes; i++) { + node = kfd->nodes[i]; + r = node->dqm->ops.halt(node->dqm); + if (r) + return r; + } + return 0; +} + bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { struct kfd_node *node; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 76359c6a3f3a..6e7bc983fc0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, pr_debug_ratelimited("Evicting process pid %d queues\n", pdd->process->lead_thread->pid); + if (dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); + retval = suspend_all_queues_mes(dqm); + if (retval) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + } + /* Mark all queues as evicted. Deactivate all active queues on * the qpd. */ @@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - int err; - - err = remove_queue_mes(dqm, q, qpd); - if (err) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - retval = err; + goto out; } } } - pdd->last_evict_timestamp = get_jiffies_64(); - if (!dqm->dev->kfd->shared_resources.enable_mes) + + if (!dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + } else { + retval = resume_all_queues_mes(dqm); + if (retval) + dev_err(dev, "Resuming all queues failed"); + } out: dqm_unlock(dqm); @@ -2312,7 +2325,7 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm) continue; /* Reset engine and check. */ - if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) || + if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || !set_sdma_queue_as_reset(dqm, doorbell_off)) { r = -ENOTRECOVERABLE; @@ -2339,9 +2352,18 @@ reset_fail: static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) { + struct amdgpu_device *adev = dqm->dev->adev; + while (halt_if_hws_hang) schedule(); + if (adev->debug_disable_gpu_ring_reset) { + dev_info_once(adev->dev, + "%s queue hung, but ring reset disabled", + is_sdma ? "sdma" : "compute"); + + return -EPERM; + } if (!amdgpu_gpu_recovery) return -ENOTRECOVERABLE; @@ -2716,7 +2738,7 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm, dqm_lock(dqm); mqd_mgr = dqm->mqd_mgrs[mqd_type]; - *mqd_size = mqd_mgr->mqd_size; + *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); *ctl_stack_size = 0; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) @@ -3089,61 +3111,17 @@ out: return ret; } -static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct device *dev = dqm->dev->adev->dev; - int ret = 0; - - /* Check if process is already evicted */ - dqm_lock(dqm); - if (qpd->evicted) { - /* Increment the evicted count to make sure the - * process stays evicted before its terminated. - */ - qpd->evicted++; - dqm_unlock(dqm); - goto out; - } - dqm_unlock(dqm); - - ret = suspend_all_queues_mes(dqm); - if (ret) { - dev_err(dev, "Suspending all queues failed"); - goto out; - } - - ret = dqm->ops.evict_process_queues(dqm, qpd); - if (ret) { - dev_err(dev, "Evicting process queues failed"); - goto out; - } - - ret = resume_all_queues_mes(dqm); - if (ret) - dev_err(dev, "Resuming all queues failed"); - -out: - return ret; -} - int kfd_evict_process_device(struct kfd_process_device *pdd) { struct device_queue_manager *dqm; struct kfd_process *p; - int ret = 0; p = pdd->process; dqm = pdd->dev->dqm; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - if (dqm->dev->kfd->shared_resources.enable_mes) - ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); - else - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); - - return ret; + return dqm->ops.evict_process_queues(dqm, &pdd->qpd); } int reserve_debug_trap_vmid(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index dbcb60eb54b2..1d170dc50df3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -23,7 +23,6 @@ */ #include <linux/device.h> -#include <linux/export.h> #include <linux/err.h> #include <linux/fs.h> #include <linux/sched.h> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 2b0a830f5b29..fb3129883a4c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -46,11 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, int retval; union PM4_MES_TYPE_3_HEADER nop; - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) - return false; - - pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, - queue_size); + pr_debug("Initializing queue type %d size %d\n", type, queue_size); memset(&prop, 0, sizeof(prop)); memset(&nop, 0, sizeof(nop)); @@ -69,6 +65,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev, kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; break; default: + WARN(1, "Invalid queue type %d\n", type); dev_err(dev->adev->dev, "Invalid queue type %d\n", type); return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 79251f22b702..59a5a3fea65d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -39,22 +39,22 @@ #endif #define dev_fmt(fmt) "kfd_migrate: " fmt -static uint64_t -svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +static u64 +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr) { return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); } static int -svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, - dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages, + dma_addr_t *addr, u64 *gart_addr, u64 flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; unsigned int num_dw, num_bytes; struct dma_fence *fence; - uint64_t src_addr, dst_addr; - uint64_t pte_flags; + u64 src_addr, dst_addr; + u64 pte_flags; void *cpu_addr; int r; @@ -68,7 +68,8 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, - &job); + &job, + AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP); if (r) return r; @@ -122,15 +123,15 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, static int svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, - uint64_t *vram, uint64_t npages, + u64 *vram, u64 npages, enum MIGRATION_COPY_DIR direction, struct dma_fence **mfence) { - const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - uint64_t gart_s, gart_d; + u64 gart_s, gart_d; struct dma_fence *next; - uint64_t size; + u64 size; int r; mutex_lock(&adev->mman.gtt_window_lock); @@ -260,39 +261,39 @@ static void svm_migrate_put_sys_page(unsigned long addr) put_page(page); } -static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) +static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate) { - unsigned long upages = 0; + unsigned long mpages = 0; unsigned long i; for (i = 0; i < migrate->npages; i++) { - if (migrate->src[i] & MIGRATE_PFN_VALID && - !(migrate->src[i] & MIGRATE_PFN_MIGRATE)) - upages++; + if (migrate->dst[i] & MIGRATE_PFN_VALID && + migrate->src[i] & MIGRATE_PFN_MIGRATE) + mpages++; } - return upages; + return mpages; } static int svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch, uint64_t ttm_res_offset) + dma_addr_t *scratch, u64 ttm_res_offset) { - uint64_t npages = migrate->npages; + u64 npages = migrate->npages; struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; - uint64_t mpages = 0; + u64 mpages = 0; dma_addr_t *src; - uint64_t *dst; - uint64_t i, j; + u64 *dst; + u64 i, j; int r; pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, prange->last, ttm_res_offset); src = scratch; - dst = (uint64_t *)(scratch + npages); + dst = (u64 *)(scratch + npages); amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); @@ -385,11 +386,11 @@ out_free_vram_pages: static long svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) + struct vm_area_struct *vma, u64 start, + u64 end, uint32_t trigger, u64 ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); - uint64_t npages = (end - start) >> PAGE_SHIFT; + u64 npages = (end - start) >> PAGE_SHIFT; struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; @@ -408,7 +409,7 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); buf = kvcalloc(npages, - 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t), GFP_KERNEL); if (!buf) goto out; @@ -447,9 +448,9 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); - mpages = cpages - svm_migrate_unsuccessful_pages(&migrate); - pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - mpages, cpages, migrate.npages); + mpages = svm_migrate_successful_pages(&migrate); + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages); @@ -490,7 +491,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, { unsigned long addr, start, end; struct vm_area_struct *vma; - uint64_t ttm_res_offset; + u64 ttm_res_offset; struct kfd_node *node; unsigned long mpages = 0; long r = 0; @@ -580,14 +581,14 @@ static void svm_migrate_page_free(struct page *page) static int svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch, uint64_t npages) + dma_addr_t *scratch, u64 npages) { struct device *dev = adev->dev; - uint64_t *src; + u64 *src; dma_addr_t *dst; struct page *dpage; - uint64_t i = 0, j; - uint64_t addr; + u64 i = 0, j; + u64 addr; int r = 0; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, @@ -595,7 +596,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, addr = migrate->start; - src = (uint64_t *)(scratch + npages); + src = (u64 *)(scratch + npages); dst = scratch; for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) { @@ -683,12 +684,11 @@ out_oom: */ static long svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, - struct vm_area_struct *vma, uint64_t start, uint64_t end, + struct vm_area_struct *vma, u64 start, u64 end, uint32_t trigger, struct page *fault_page) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); - uint64_t npages = (end - start) >> PAGE_SHIFT; - unsigned long upages = npages; + u64 npages = (end - start) >> PAGE_SHIFT; unsigned long cpages = 0; unsigned long mpages = 0; struct amdgpu_device *adev = node->adev; @@ -710,7 +710,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; buf = kvcalloc(npages, - 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t), + 2 * sizeof(*migrate.src) + sizeof(u64) + sizeof(dma_addr_t), GFP_KERNEL); if (!buf) goto out; @@ -736,7 +736,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, if (!cpages) { pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", prange->start, prange->last); - upages = svm_migrate_unsuccessful_pages(&migrate); goto out_free; } if (cpages != npages) @@ -749,9 +748,9 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, scratch, npages); migrate_vma_pages(&migrate); - upages = svm_migrate_unsuccessful_pages(&migrate); - pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n", - upages, cpages, migrate.npages); + mpages = svm_migrate_successful_pages(&migrate); + pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n", + mpages, cpages, migrate.npages); svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); @@ -764,8 +763,7 @@ out_free: start >> PAGE_SHIFT, end >> PAGE_SHIFT, node->id, 0, trigger, r); out: - if (!r && cpages) { - mpages = cpages - upages; + if (!r && mpages) { pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_out, pdd->page_out + mpages); @@ -848,6 +846,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, } if (r >= 0) { + WARN_ONCE(prange->vram_pages < mpages, + "Recorded vram pages(0x%llx) should not be less than migration pages(0x%lx).", + prange->vram_pages, mpages); prange->vram_pages -= mpages; /* prange does not have vram page set its actual_loc to system diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index aee2212e52f6..33aa23450b3f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -78,8 +78,8 @@ err_ioctl: static void kfd_exit(void) { kfd_cleanup_processes(); - kfd_debugfs_fini(); kfd_process_destroy_wq(); + kfd_debugfs_fini(); kfd_procfs_shutdown(); kfd_topology_shutdown(); kfd_chardev_exit(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 97933d2a3803..f2dee320fada 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -373,7 +373,7 @@ static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stac { struct v9_mqd *m = get_mqd(mqd); - *ctl_stack_size = m->cp_hqd_cntl_stack_size; + *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask); } static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst) @@ -388,6 +388,24 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size); } +static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm, + void *mqd, + void *mqd_dst, + void *ctl_stack_dst) +{ + struct v9_mqd *m; + int xcc; + uint64_t size = get_mqd(mqd)->cp_mqd_stride_size; + + for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) { + m = get_mqd(mqd + size * xcc); + + checkpoint_mqd(mm, m, + (uint8_t *)mqd_dst + sizeof(*m) * xcc, + (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size * xcc); + } +} + static void restore_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *qp, @@ -764,13 +782,35 @@ static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, const void *mqd_src, const void *ctl_stack_src, u32 ctl_stack_size) { - restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size); - if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) { - struct v9_mqd *m; + struct kfd_mem_obj xcc_mqd_mem_obj; + u32 mqd_ctl_stack_size; + struct v9_mqd *m; + u32 num_xcc; + int xcc; - m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; - m->cp_hqd_pq_doorbell_control |= 1 << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; + uint64_t offset = mm->mqd_stride(mm, qp); + + mm->dev->dqm->current_logical_xcc_start++; + + num_xcc = NUM_XCC(mm->dev->xcc_mask); + mqd_ctl_stack_size = ctl_stack_size / num_xcc; + + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj)); + + /* Set the MQD pointer and gart address to XCC0 MQD */ + *mqd = mqd_mem_obj->cpu_ptr; + if (gart_addr) + *gart_addr = mqd_mem_obj->gpu_addr; + + for (xcc = 0; xcc < num_xcc; xcc++) { + get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc); + restore_mqd(mm, (void **)&m, + &xcc_mqd_mem_obj, + NULL, + qp, + (uint8_t *)mqd_src + xcc * sizeof(*m), + (uint8_t *)ctl_stack_src + xcc * mqd_ctl_stack_size, + mqd_ctl_stack_size); } } static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, @@ -906,7 +946,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->free_mqd = kfd_free_mqd_cp; mqd->is_occupied = kfd_is_occupied_cp; mqd->get_checkpoint_info = get_checkpoint_info; - mqd->checkpoint_mqd = checkpoint_mqd; mqd->mqd_size = sizeof(struct v9_mqd); mqd->mqd_stride = mqd_stride_v9; #if defined(CONFIG_DEBUG_FS) @@ -918,16 +957,18 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; - mqd->restore_mqd = restore_mqd_v9_4_3; mqd->destroy_mqd = destroy_mqd_v9_4_3; mqd->get_wave_state = get_wave_state_v9_4_3; + mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3; + mqd->restore_mqd = restore_mqd_v9_4_3; } else { mqd->init_mqd = init_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd; - mqd->restore_mqd = restore_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->get_wave_state = get_wave_state; + mqd->checkpoint_mqd = checkpoint_mqd; + mqd->restore_mqd = restore_mqd; } break; case KFD_MQD_TYPE_HIQ: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8fa6489b6f5d..505036968a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; packet->bitfields2.extended_engine_sel = extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index d221c58dccc3..70ef051511bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -111,7 +111,14 @@ #define KFD_KERNEL_QUEUE_SIZE 2048 -#define KFD_UNMAP_LATENCY_MS (4000) +/* KFD_UNMAP_LATENCY_MS is the timeout CP waiting for SDMA preemption. One XCC + * can be associated to 2 SDMA engines. queue_preemption_timeout_ms is the time + * driver waiting for CP returning the UNMAP_QUEUE fence. Thus the math is + * queue_preemption_timeout_ms = sdma_preemption_time * 2 + cp workload + * The format here makes CP workload 10% of total timeout + */ +#define KFD_UNMAP_LATENCY_MS \ + ((queue_preemption_timeout_ms - queue_preemption_timeout_ms / 10) >> 1) #define KFD_MAX_SDMA_QUEUES 128 @@ -372,6 +379,11 @@ struct kfd_dev { /* bitmap for dynamic doorbell allocation from doorbell object */ unsigned long *doorbell_bitmap; + + /* for dynamic partitioning */ + int kfd_dev_lock; + + atomic_t kfd_processes_count; }; enum kfd_mempool { @@ -1536,7 +1548,7 @@ static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) int kfd_send_exception_to_runtime(struct kfd_process *p, unsigned int queue_id, uint64_t error_reason); -bool kfd_is_locked(void); +bool kfd_is_locked(struct kfd_dev *kfd); /* Compute profile */ void kfd_inc_compute_active(struct kfd_node *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 722ac1662bdc..ddfe30c13e9d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -854,7 +854,7 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) */ mutex_lock(&kfd_processes_mutex); - if (kfd_is_locked()) { + if (kfd_is_locked(NULL)) { pr_debug("KFD is locked! Cannot create process"); process = ERR_PTR(-EINVAL); goto out; @@ -1088,6 +1088,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->runtime_inuse = false; } + atomic_dec(&pdd->dev->kfd->kfd_processes_count); + kfree(pdd); p->pdds[i] = NULL; } @@ -1649,6 +1651,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); + atomic_inc(&dev->kfd->kfd_processes_count); + return pdd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c643e0ccec52..7fbb5c274ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -914,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, q_data = (struct kfd_criu_queue_priv_data *)q_private_data; - /* data stored in this order: priv_data, mqd, ctl_stack */ + /* + * data stored in this order: + * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ q_data->mqd_size = mqd_size; q_data->ctl_stack_size = ctl_stack_size; @@ -963,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p, } static void set_queue_properties_from_criu(struct queue_properties *qp, - struct kfd_criu_queue_priv_data *q_data) + struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc) { qp->is_interop = false; qp->queue_percent = q_data->q_percent; @@ -976,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; - qp->ctl_stack_size = q_data->ctl_stack_size; + if (q_data->type == KFD_QUEUE_TYPE_COMPUTE) + qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc; + else + qp->ctl_stack_size = q_data->ctl_stack_size; + qp->type = q_data->type; qp->format = q_data->format; } @@ -1036,12 +1043,15 @@ int kfd_criu_restore_queue(struct kfd_process *p, goto exit; } - /* data stored in this order: mqd, ctl_stack */ + /* + * data stored in this order: + * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ mqd = q_extra_data; ctl_stack = mqd + q_data->mqd_size; memset(&qp, 0, sizeof(qp)); - set_queue_properties_from_criu(&qp, q_data); + set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask)); print_queue_properties(&qp); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 865dca2547de..9d72411c3379 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -1190,7 +1189,7 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) } static uint64_t -svm_range_get_pte_flags(struct kfd_node *node, +svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm, struct svm_range *prange, int domain) { struct kfd_node *bo_node; @@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - if (gc_ip_version < IP_VERSION(9, 5, 0)) + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_UC; else mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -1293,10 +1292,6 @@ svm_range_get_pte_flags(struct kfd_node *node, AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } - mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE; - - if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO) - mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE; if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; @@ -1306,7 +1301,10 @@ svm_range_get_pte_flags(struct kfd_node *node, if (gc_ip_version >= IP_VERSION(12, 0, 0)) pte_flags |= AMDGPU_PTE_IS_PTE; - pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); + amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags); + pte_flags |= AMDGPU_PTE_READABLE; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; return pte_flags; } @@ -1413,7 +1411,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", last_start, prange->start + i, last_domain ? "GPU" : "CPU"); - pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain); + pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain); if (readonly) pte_flags &= ~AMDGPU_PTE_WRITEABLE; @@ -1715,9 +1713,32 @@ static int svm_range_validate_and_map(struct mm_struct *mm, next = min(vma->vm_end, end); npages = (next - addr) >> PAGE_SHIFT; + /* HMM requires at least READ permissions. If provided with PROT_NONE, + * unmap the memory. If it's not already mapped, this is a no-op + * If PROT_WRITE is provided without READ, warn first then unmap + */ + if (!(vma->vm_flags & VM_READ)) { + unsigned long e, s; + + svm_range_lock(prange); + if (vma->vm_flags & VM_WRITE) + pr_debug("VM_WRITE without VM_READ is not supported"); + s = max(start, prange->start); + e = min(end, prange->last); + if (e >= s) + r = svm_range_unmap_from_gpus(prange, s, e, + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU); + svm_range_unlock(prange); + /* If unmap returns non-zero, we'll bail on the next for loop + * iteration, so just leave r and continue + */ + addr = next; + continue; + } + WRITE_ONCE(p->svms.faulting_task, current); r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, NULL, + readonly, owner, &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); if (r) @@ -2394,15 +2415,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2416,8 +2439,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2438,12 +2460,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2520,14 +2542,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2570,8 +2592,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2598,7 +2618,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } @@ -3026,6 +3045,8 @@ retry_write_locked: if (svms->checkpoint_ts[gpuidx] != 0) { if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + if (write_locked) + mmap_write_downgrade(mm); r = -EAGAIN; goto out_unlock_svms; } else { @@ -4242,7 +4263,7 @@ svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, r = svm_range_get_attr(p, mm, start, size, nattrs, attrs); break; default: - r = EINVAL; + r = -EINVAL; break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index baa2374acdeb..5c98746eb72d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && + (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; + sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", dev->node_props.max_engine_clk_fcompute); @@ -526,6 +530,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version", dev->gpu->kfd->sdma_fw_version); sysfs_show_64bit_prop(buffer, offs, "unique_id", + dev->gpu->xcp ? + dev->gpu->xcp->unique_id : dev->gpu->adev->unique_id); sysfs_show_32bit_prop(buffer, offs, "num_xcc", NUM_XCC(dev->gpu->xcc_mask)); @@ -1583,7 +1589,8 @@ static int kfd_dev_create_p2p_links(void) break; if (!dev->gpu || !dev->gpu->adev || (dev->gpu->kfd->hive_id && - dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id)) + dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id && + amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev))) goto next; /* check if node(s) is/are peer accessible in one direction or bi-direction */ @@ -2008,8 +2015,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (!amdgpu_sriov_vf(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c index faed84172dd4..44009aa8216e 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.c @@ -21,6 +21,7 @@ * */ +#include <linux/export.h> #include <linux/init.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -45,18 +46,29 @@ static const struct drm_driver amdgpu_xcp_driver = { static int8_t pdev_num; static struct xcp_device *xcp_dev[MAX_XCP_PLATFORM_DEVICE]; +static DEFINE_MUTEX(xcp_mutex); int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) { struct platform_device *pdev; struct xcp_device *pxcp_dev; char dev_name[20]; - int ret; + int ret, i; + + guard(mutex)(&xcp_mutex); if (pdev_num >= MAX_XCP_PLATFORM_DEVICE) return -ENODEV; - snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", pdev_num); + for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) { + if (!xcp_dev[i]) + break; + } + + if (i >= MAX_XCP_PLATFORM_DEVICE) + return -ENODEV; + + snprintf(dev_name, sizeof(dev_name), "amdgpu_xcp_%d", i); pdev = platform_device_register_simple(dev_name, -1, NULL, 0); if (IS_ERR(pdev)) return PTR_ERR(pdev); @@ -72,8 +84,8 @@ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev) goto out_devres; } - xcp_dev[pdev_num] = pxcp_dev; - xcp_dev[pdev_num]->pdev = pdev; + xcp_dev[i] = pxcp_dev; + xcp_dev[i]->pdev = pdev; *ddev = &pxcp_dev->drm; pdev_num++; @@ -88,16 +100,43 @@ out_unregister: } EXPORT_SYMBOL(amdgpu_xcp_drm_dev_alloc); -void amdgpu_xcp_drv_release(void) +static void free_xcp_dev(int8_t index) { - for (--pdev_num; pdev_num >= 0; --pdev_num) { - struct platform_device *pdev = xcp_dev[pdev_num]->pdev; + if ((index < MAX_XCP_PLATFORM_DEVICE) && (xcp_dev[index])) { + struct platform_device *pdev = xcp_dev[index]->pdev; devres_release_group(&pdev->dev, NULL); platform_device_unregister(pdev); - xcp_dev[pdev_num] = NULL; + + xcp_dev[index] = NULL; + pdev_num--; + } +} + +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev) +{ + int8_t i; + + guard(mutex)(&xcp_mutex); + + for (i = 0; i < MAX_XCP_PLATFORM_DEVICE; i++) { + if ((xcp_dev[i]) && (&xcp_dev[i]->drm == ddev)) { + free_xcp_dev(i); + break; + } + } +} +EXPORT_SYMBOL(amdgpu_xcp_drm_dev_free); + +void amdgpu_xcp_drv_release(void) +{ + int8_t i; + + guard(mutex)(&xcp_mutex); + + for (i = 0; pdev_num && i < MAX_XCP_PLATFORM_DEVICE; i++) { + free_xcp_dev(i); } - pdev_num = 0; } EXPORT_SYMBOL(amdgpu_xcp_drv_release); diff --git a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h index c1c4b679bf95..580a1602c8e3 100644 --- a/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h +++ b/drivers/gpu/drm/amd/amdxcp/amdgpu_xcp_drv.h @@ -25,5 +25,6 @@ #define _AMDGPU_XCP_DRV_H_ int amdgpu_xcp_drm_dev_alloc(struct drm_device **ddev); +void amdgpu_xcp_drm_dev_free(struct drm_device *ddev); void amdgpu_xcp_drv_release(void); #endif /* _AMDGPU_XCP_DRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 89d605de0595..0084a8d55254 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -44,6 +44,7 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mmhubbub subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/mpc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/opp subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/pg +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/soc_and_ip_translator subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3100f641ac6..bfa3199591b6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * @@ -39,13 +40,11 @@ #include "dc/dc_stat.h" #include "dc/dc_state.h" #include "amdgpu_dm_trace.h" -#include "dpcd_defs.h" #include "link/protocols/link_dpcd.h" #include "link_service_types.h" #include "link/protocols/link_dp_capability.h" #include "link/protocols/link_ddc.h" -#include "vid.h" #include "amdgpu.h" #include "amdgpu_display.h" #include "amdgpu_ucode.h" @@ -56,7 +55,6 @@ #include "amdgpu_dm_hdcp.h" #include <drm/display/drm_hdcp_helper.h> #include "amdgpu_dm_wb.h" -#include "amdgpu_pm.h" #include "amdgpu_atombios.h" #include "amd_shared.h" @@ -82,6 +80,7 @@ #include <linux/component.h> #include <linux/sort.h> +#include <drm/drm_privacy_screen_consumer.h> #include <drm/display/drm_dp_mst_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/drm_atomic.h> @@ -102,15 +101,6 @@ #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h" -#include "dcn/dcn_1_0_offset.h" -#include "dcn/dcn_1_0_sh_mask.h" -#include "soc15_hw_ip.h" -#include "soc15_common.h" -#include "vega10_ip_offset.h" - -#include "gc/gc_11_0_0_offset.h" -#include "gc/gc_11_0_0_sh_mask.h" - #include "modules/inc/mod_freesync.h" #include "modules/power/power_helpers.h" @@ -243,6 +233,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static int amdgpu_dm_connector_get_modes(struct drm_connector *connector); +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state); static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, @@ -427,8 +418,7 @@ static inline bool update_planes_and_stream_adapter(struct dc *dc, /* * Previous frame finished and HW is ready for optimization. */ - if (update_type == UPDATE_TYPE_FAST) - dc_post_update_surfaces_to_stream(dc); + dc_post_update_surfaces_to_stream(dc); return dc_update_planes_and_stream(dc, array_of_surface_update, @@ -541,6 +531,50 @@ static void dm_pflip_high_irq(void *interrupt_params) amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e); } +static void dm_handle_vmin_vmax_update(struct work_struct *offload_work) +{ + struct vupdate_offload_work *work = container_of(offload_work, struct vupdate_offload_work, work); + struct amdgpu_device *adev = work->adev; + struct dc_stream_state *stream = work->stream; + struct dc_crtc_timing_adjust *adjust = work->adjust; + + mutex_lock(&adev->dm.dc_lock); + dc_stream_adjust_vmin_vmax(adev->dm.dc, stream, adjust); + mutex_unlock(&adev->dm.dc_lock); + + dc_stream_release(stream); + kfree(work->adjust); + kfree(work); +} + +static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, + struct dc_stream_state *stream, + struct dc_crtc_timing_adjust *adjust) +{ + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT); + if (!offload_work) { + drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); + return; + } + + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT); + if (!adjust_copy) { + drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); + kfree(offload_work); + return; + } + + dc_stream_retain(stream); + memcpy(adjust_copy, adjust, sizeof(*adjust_copy)); + + INIT_WORK(&offload_work->work, dm_handle_vmin_vmax_update); + offload_work->adev = adev; + offload_work->stream = stream; + offload_work->adjust = adjust_copy; + + queue_work(system_wq, &offload_work->work); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -578,22 +612,27 @@ static void dm_vupdate_high_irq(void *interrupt_params) * page-flip completion events that have been queued to us * if a pageflip happened inside front-porch. */ - if (vrr_active) { + if (vrr_active && acrtc->dm_irq_params.stream) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state + == VRR_STATE_ACTIVE_VARIABLE; + amdgpu_dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ - if (acrtc->dm_irq_params.stream && - adev->family < AMDGPU_FAMILY_AI) { + if (adev->family < AMDGPU_FAMILY_AI) { spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); mod_freesync_handle_v_update( adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + schedule_dc_vmin_vmax(adev, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); } } @@ -676,15 +715,20 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + schedule_dc_vmin_vmax(adev, acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* @@ -1758,10 +1802,11 @@ dm_dmub_send_vbios_gpint_command(struct amdgpu_device *adev, return DMUB_STATUS_TIMEOUT; } -static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *adev) +static void *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *adev) { - struct dml2_soc_bb *bb; + void *bb; long long addr; + unsigned int bb_size; int i = 0; uint16_t chunk; enum dmub_gpint_command send_addrs[] = { @@ -1774,6 +1819,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(4, 0, 1): + bb_size = sizeof(struct dml2_soc_bb); break; default: return NULL; @@ -1781,7 +1827,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * bb = dm_allocate_gpu_mem(adev, DC_MEM_ALLOC_TYPE_GART, - sizeof(struct dml2_soc_bb), + bb_size, &addr); if (!bb) return NULL; @@ -1847,7 +1893,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) mutex_init(&adev->dm.audio_lock); if (amdgpu_dm_irq_init(adev)) { - drm_err(adev_to_drm(adev), "amdgpu: failed to initialize DM IRQ support.\n"); + drm_err(adev_to_drm(adev), "failed to initialize DM IRQ support.\n"); goto error; } @@ -1954,6 +2000,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.flags.disable_ips_in_vpb = 0; + /* DCN35 and above supports dynamic DTBCLK switch */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0)) + init_data.flags.allow_0_dtb_clk = true; + /* Enable DWB for tested platforms only */ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; @@ -2037,7 +2087,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { - drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd rx offload workqueue.\n"); + drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); goto error; } @@ -2053,7 +2103,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.freesync_module = mod_freesync_create(adev->dm.dc); if (!adev->dm.freesync_module) { drm_err(adev_to_drm(adev), - "amdgpu: failed to initialize freesync_module.\n"); + "failed to initialize freesync_module.\n"); } else drm_dbg_driver(adev_to_drm(adev), "amdgpu: freesync_module init done %p.\n", adev->dm.freesync_module); @@ -2064,7 +2114,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.vblank_control_workqueue = create_singlethread_workqueue("dm_vblank_control_workqueue"); if (!adev->dm.vblank_control_workqueue) - drm_err(adev_to_drm(adev), "amdgpu: failed to initialize vblank_workqueue.\n"); + drm_err(adev_to_drm(adev), "failed to initialize vblank_workqueue.\n"); } if (adev->dm.dc->caps.ips_support && @@ -2075,7 +2125,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc); if (!adev->dm.hdcp_workqueue) - drm_err(adev_to_drm(adev), "amdgpu: failed to initialize hdcp_workqueue.\n"); + drm_err(adev_to_drm(adev), "failed to initialize hdcp_workqueue.\n"); else drm_dbg_driver(adev_to_drm(adev), "amdgpu: hdcp_workqueue init done %p.\n", adev->dm.hdcp_workqueue); @@ -2085,20 +2135,20 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_completion(&adev->dm.dmub_aux_transfer_done); adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL); if (!adev->dm.dmub_notify) { - drm_info(adev_to_drm(adev), "amdgpu: fail to allocate adev->dm.dmub_notify"); + drm_info(adev_to_drm(adev), "fail to allocate adev->dm.dmub_notify"); goto error; } adev->dm.delayed_hpd_wq = create_singlethread_workqueue("amdgpu_dm_hpd_wq"); if (!adev->dm.delayed_hpd_wq) { - drm_err(adev_to_drm(adev), "amdgpu: failed to create hpd offload workqueue.\n"); + drm_err(adev_to_drm(adev), "failed to create hpd offload workqueue.\n"); goto error; } amdgpu_dm_outbox_init(adev); if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_AUX_REPLY, dmub_aux_setconfig_callback, false)) { - drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub aux callback"); + drm_err(adev_to_drm(adev), "fail to register dmub aux callback"); goto error; } @@ -2107,7 +2157,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_FUSED_IO, dmub_aux_fused_io_callback, false)) { - drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub fused io callback"); + drm_err(adev_to_drm(adev), "fail to register dmub fused io callback"); goto error; } /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. @@ -2125,7 +2175,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dm_initialize_drm_device(adev)) { drm_err(adev_to_drm(adev), - "amdgpu: failed to initialize sw for display support.\n"); + "failed to initialize sw for display support.\n"); goto error; } @@ -2140,14 +2190,14 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { drm_err(adev_to_drm(adev), - "amdgpu: failed to initialize sw for display support.\n"); + "failed to initialize vblank for display support.\n"); goto error; } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) amdgpu_dm_crtc_secure_display_create_contexts(adev); if (!adev->dm.secure_display_ctx.crtc_ctx) - drm_err(adev_to_drm(adev), "amdgpu: failed to initialize secure display contexts.\n"); + drm_err(adev_to_drm(adev), "failed to initialize secure display contexts.\n"); if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1)) adev->dm.secure_display_ctx.support_mul_roi = true; @@ -2404,6 +2454,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_5_TRACEBUFF DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_6_FW_STATE DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_7_SCRATCH_MEM + DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_IB_MEM DMUB_WINDOW_MEMORY_TYPE_FB, //DMUB_WINDOW_SHARED_STATE }; int r; @@ -2570,7 +2621,7 @@ static int dm_sw_init(struct amdgpu_ip_block *ip_block) adev->dm.cgs_device = amdgpu_cgs_create_device(adev); if (!adev->dm.cgs_device) { - drm_err(adev_to_drm(adev), "amdgpu: failed to create cgs device.\n"); + drm_err(adev_to_drm(adev), "failed to create cgs device.\n"); return -EINVAL; } @@ -2898,7 +2949,7 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev) return -ENOMEM; } - r = i2c_add_adapter(&oem_i2c->base); + r = devm_i2c_add_adapter(adev->dev, &oem_i2c->base); if (r) { drm_info(adev_to_drm(adev), "Failed to register oem i2c\n"); kfree(oem_i2c); @@ -2960,8 +3011,6 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - kfree(adev->dm.oem_i2c); - amdgpu_dm_hpd_fini(adev); amdgpu_dm_irq_fini(adev); @@ -2989,14 +3038,20 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, drm_warn(adev_to_drm(adev), "Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); - if (enable) { - if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state))) - rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true); - } else - rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false); - - if (rc) - drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); + if (dc_supports_vrr(adev->dm.dc->ctx->dce_version)) { + if (enable) { + if (amdgpu_dm_crtc_vrr_active( + to_dm_crtc_state(acrtc->base.state))) + rc = amdgpu_dm_crtc_set_vupdate_irq( + &acrtc->base, true); + } else + rc = amdgpu_dm_crtc_set_vupdate_irq( + &acrtc->base, false); + + if (rc) + drm_warn(adev_to_drm(adev), "Failed to %sable vupdate interrupt\n", + enable ? "en" : "dis"); + } irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; /* During gpu-reset we disable and then enable vblank irq, so @@ -3060,19 +3115,68 @@ static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm) } } -static int dm_prepare_suspend(struct amdgpu_ip_block *ip_block) +static int dm_cache_state(struct amdgpu_device *adev) { - struct amdgpu_device *adev = ip_block->adev; - - if (amdgpu_in_reset(adev)) - return 0; + int r; - WARN_ON(adev->dm.cached_state); adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); - if (IS_ERR(adev->dm.cached_state)) - return PTR_ERR(adev->dm.cached_state); + if (IS_ERR(adev->dm.cached_state)) { + r = PTR_ERR(adev->dm.cached_state); + adev->dm.cached_state = NULL; + } - return 0; + return adev->dm.cached_state ? 0 : r; +} + +static void dm_destroy_cached_state(struct amdgpu_device *adev) +{ + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_device *ddev = adev_to_drm(adev); + struct dm_plane_state *dm_new_plane_state; + struct drm_plane_state *new_plane_state; + struct dm_crtc_state *dm_new_crtc_state; + struct drm_crtc_state *new_crtc_state; + struct drm_plane *plane; + struct drm_crtc *crtc; + int i; + + if (!dm->cached_state) + return; + + /* Force mode set in atomic commit */ + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { + new_crtc_state->active_changed = true; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + reset_freesync_config_for_crtc(dm_new_crtc_state); + } + + /* + * atomic_check is expected to create the dc states. We need to release + * them here, since they were duplicated as part of the suspend + * procedure. + */ + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + if (dm_new_crtc_state->stream) { + WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1); + dc_stream_release(dm_new_crtc_state->stream); + dm_new_crtc_state->stream = NULL; + } + dm_new_crtc_state->base.color_mgmt_changed = true; + } + + for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { + dm_new_plane_state = to_dm_plane_state(new_plane_state); + if (dm_new_plane_state->dc_state) { + WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1); + dc_plane_state_release(dm_new_plane_state->dc_state); + dm_new_plane_state->dc_state = NULL; + } + } + + drm_atomic_helper_resume(ddev, dm->cached_state); + + dm->cached_state = NULL; } static int dm_suspend(struct amdgpu_ip_block *ip_block) @@ -3106,9 +3210,10 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block) } if (!adev->dm.cached_state) { - adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); - if (IS_ERR(adev->dm.cached_state)) - return PTR_ERR(adev->dm.cached_state); + int r = dm_cache_state(adev); + + if (r) + return r; } s3_handle_hdmi_cec(adev_to_drm(adev), true); @@ -3295,12 +3400,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; struct drm_connector_list_iter iter; - struct drm_crtc *crtc; - struct drm_crtc_state *new_crtc_state; - struct dm_crtc_state *dm_new_crtc_state; - struct drm_plane *plane; - struct drm_plane_state *new_plane_state; - struct dm_plane_state *dm_new_plane_state; struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; @@ -3332,8 +3431,10 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); r = dm_dmub_hw_init(adev); - if (r) + if (r) { drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); + return r; + } dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); @@ -3457,40 +3558,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) } drm_connector_list_iter_end(&iter); - /* Force mode set in atomic commit */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { - new_crtc_state->active_changed = true; - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - reset_freesync_config_for_crtc(dm_new_crtc_state); - } - - /* - * atomic_check is expected to create the dc states. We need to release - * them here, since they were duplicated as part of the suspend - * procedure. - */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - if (dm_new_crtc_state->stream) { - WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1); - dc_stream_release(dm_new_crtc_state->stream); - dm_new_crtc_state->stream = NULL; - } - dm_new_crtc_state->base.color_mgmt_changed = true; - } - - for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { - dm_new_plane_state = to_dm_plane_state(new_plane_state); - if (dm_new_plane_state->dc_state) { - WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1); - dc_plane_state_release(dm_new_plane_state->dc_state); - dm_new_plane_state->dc_state = NULL; - } - } - - drm_atomic_helper_resume(ddev, dm->cached_state); - - dm->cached_state = NULL; + dm_destroy_cached_state(adev); /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); @@ -3536,7 +3604,6 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .early_fini = amdgpu_dm_early_fini, .hw_init = dm_hw_init, .hw_fini = dm_hw_fini, - .prepare_suspend = dm_prepare_suspend, .suspend = dm_suspend, .resume = dm_resume, .is_idle = dm_is_idle, @@ -3571,23 +3638,25 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { .atomic_commit_tail = amdgpu_dm_atomic_commit_tail, - .atomic_commit_setup = drm_dp_mst_atomic_setup_commit, + .atomic_commit_setup = amdgpu_dm_atomic_setup_commit, }; static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) { + const struct drm_panel_backlight_quirk *panel_backlight_quirk; struct amdgpu_dm_backlight_caps *caps; struct drm_connector *conn_base; struct amdgpu_device *adev; struct drm_luminance_range_info *luminance_range; - int min_input_signal_override; + struct drm_device *drm; if (aconnector->bl_idx == -1 || aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP) return; conn_base = &aconnector->base; - adev = drm_to_adev(conn_base->dev); + drm = conn_base->dev; + adev = drm_to_adev(drm); caps = &adev->dm.backlight_caps[aconnector->bl_idx]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; @@ -3610,17 +3679,34 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) luminance_range = &conn_base->display_info.luminance_range; - if (luminance_range->max_luminance) { - caps->aux_min_input_signal = luminance_range->min_luminance; + if (luminance_range->max_luminance) caps->aux_max_input_signal = luminance_range->max_luminance; - } else { - caps->aux_min_input_signal = 0; + else caps->aux_max_input_signal = 512; - } - min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); - if (min_input_signal_override >= 0) - caps->min_input_signal = min_input_signal_override; + if (luminance_range->min_luminance) + caps->aux_min_input_signal = luminance_range->min_luminance; + else + caps->aux_min_input_signal = 1; + + panel_backlight_quirk = + drm_get_panel_backlight_quirk(aconnector->drm_edid); + if (!IS_ERR_OR_NULL(panel_backlight_quirk)) { + if (panel_backlight_quirk->min_brightness) { + caps->min_input_signal = + panel_backlight_quirk->min_brightness - 1; + drm_info(drm, + "Applying panel backlight quirk, min_brightness: %d\n", + caps->min_input_signal); + } + if (panel_backlight_quirk->brightness_mask) { + drm_info(drm, + "Applying panel backlight quirk, brightness_mask: 0x%X\n", + panel_backlight_quirk->brightness_mask); + caps->brightness_mask = + panel_backlight_quirk->brightness_mask; + } + } } DEFINE_FREE(sink_release, struct dc_sink *, if (_T) dc_sink_release(_T)) @@ -4001,19 +4087,19 @@ static int register_hpd_handlers(struct amdgpu_device *adev) if (dc_is_dmub_outbox_supported(adev->dm.dc)) { if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd callback"); + drm_err(adev_to_drm(adev), "fail to register dmub hpd callback"); return -EINVAL; } if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY, dmub_hpd_sense_callback, true)) { - drm_err(adev_to_drm(adev), "amdgpu: fail to register dmub hpd sense callback"); + drm_err(adev_to_drm(adev), "fail to register dmub hpd sense callback"); return -EINVAL; } } @@ -4718,11 +4804,25 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } +/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ +static inline u32 scale_input_to_fw(int min, int max, u64 input) +{ + return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); +} + +/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ +static inline u32 scale_fw_to_input(int min, int max, u64 input) +{ + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); +} + static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, - uint32_t *brightness) + unsigned int min, unsigned int max, + uint32_t *user_brightness) { - u8 prev_signal = 0, prev_lum = 0; - int i = 0; + u32 brightness = scale_input_to_fw(min, max, *user_brightness); + u8 lower_signal, upper_signal, upper_lum, lower_lum, lum; + int left, right; if (amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE) return; @@ -4730,31 +4830,54 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap if (!caps->data_points) return; - /* choose start to run less interpolation steps */ - if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) - i = caps->data_points/2; - do { - u8 signal = caps->luminance_data[i].input_signal; - u8 lum = caps->luminance_data[i].luminance; + /* + * Handle the case where brightness is below the first data point + * Interpolate between (0,0) and (first_signal, first_lum) + */ + if (brightness < caps->luminance_data[0].input_signal) { + lum = DIV_ROUND_CLOSEST(caps->luminance_data[0].luminance * brightness, + caps->luminance_data[0].input_signal); + goto scale; + } - /* - * brightness == signal: luminance is percent numerator - * brightness < signal: interpolate between previous and current luminance numerator - * brightness > signal: find next data point - */ - if (*brightness > signal) { - prev_signal = signal; - prev_lum = lum; - i++; - continue; + left = 0; + right = caps->data_points - 1; + while (left <= right) { + int mid = left + (right - left) / 2; + u8 signal = caps->luminance_data[mid].input_signal; + + /* Exact match found */ + if (signal == brightness) { + lum = caps->luminance_data[mid].luminance; + goto scale; } - if (*brightness < signal) - lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (*brightness - prev_signal), - signal - prev_signal); - *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); - return; - } while (i < caps->data_points); + + if (signal < brightness) + left = mid + 1; + else + right = mid - 1; + } + + /* verify bound */ + if (left >= caps->data_points) + left = caps->data_points - 1; + + /* At this point, left > right */ + lower_signal = caps->luminance_data[right].input_signal; + upper_signal = caps->luminance_data[left].input_signal; + lower_lum = caps->luminance_data[right].luminance; + upper_lum = caps->luminance_data[left].luminance; + + /* interpolate */ + if (right == left || !lower_lum) + lum = upper_lum; + else + lum = lower_lum + DIV_ROUND_CLOSEST((upper_lum - lower_lum) * + (brightness - lower_signal), + upper_signal - lower_signal); +scale: + *user_brightness = scale_fw_to_input(min, max, + DIV_ROUND_CLOSEST(lum * brightness, 101)); } static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4765,11 +4888,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c if (!get_brightness_range(caps, &min, &max)) return brightness; - convert_custom_brightness(caps, &brightness); + convert_custom_brightness(caps, min, max, &brightness); - // Rescale 0..255 to min..max - return min + DIV_ROUND_CLOSEST((max - min) * brightness, - AMDGPU_MAX_BL_LEVEL); + // Rescale 0..max to min..max + return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max); } static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4782,8 +4904,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap if (brightness < min) return 0; - // Rescale min..max to 0..255 - return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), + // Rescale min..max to 0..max + return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min), max - min); } @@ -4806,6 +4928,10 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, brightness = convert_brightness_from_user(caps, dm->brightness[bl_idx]); link = (struct dc_link *)dm->backlight_link[bl_idx]; + /* Apply brightness quirk */ + if (caps->brightness_mask) + brightness |= caps->brightness_mask; + /* Change brightness based on AUX property */ mutex_lock(&dm->dc_lock); if (dm->dc->caps.ips_support && dm->dc->ctx->dmub_srv->idle_allowed) { @@ -4813,6 +4939,14 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, reallow_idle = true; } + if (trace_amdgpu_dm_brightness_enabled()) { + trace_amdgpu_dm_brightness(__builtin_return_address(0), + user_brightness, + brightness, + caps->aux_support, + power_supply_is_system_supplied() > 0); + } + if (caps->aux_support) { rc = dc_link_set_backlight_level_nits(link, true, brightness, AUX_BL_DEFAULT_TRANSITION_TIME_MS); @@ -4866,10 +5000,8 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, if (caps.aux_support) { u32 avg, peak; - bool rc; - rc = dc_link_get_backlight_level_nits(link, &avg, &peak); - if (!rc) + if (!dc_link_get_backlight_level_nits(link, &avg, &peak)) return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, avg); } @@ -4908,7 +5040,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) struct drm_device *drm = aconnector->base.dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm; struct backlight_properties props = { 0 }; - struct amdgpu_dm_backlight_caps caps = { 0 }; + struct amdgpu_dm_backlight_caps *caps; char bl_name[16]; int min, max; @@ -4922,22 +5054,24 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) return; } - amdgpu_acpi_get_backlight_caps(&caps); - if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { + caps = &dm->backlight_caps[aconnector->bl_idx]; + if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, - caps.ac_level, caps.dc_level); + caps->ac_level, caps->dc_level); } else - props.brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL; - if (caps.data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) + if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) { drm_info(drm, "Using custom brightness curve\n"); - props.max_brightness = AMDGPU_MAX_BL_LEVEL; + props.scale = BACKLIGHT_SCALE_NON_LINEAR; + } else + props.scale = BACKLIGHT_SCALE_LINEAR; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", @@ -5353,7 +5487,8 @@ fail: static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { - drm_atomic_private_obj_fini(&dm->atomic_obj); + if (dm->atomic_obj.state) + drm_atomic_private_obj_fini(&dm->atomic_obj); } /****************************************************************************** @@ -6299,6 +6434,10 @@ static void fill_stream_properties_from_drm_display_mode( && aconnector && aconnector->force_yuv420_output) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; + else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR422) + && aconnector + && aconnector->force_yuv422_output) + timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR422; else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCBCR444) && stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444; @@ -6330,13 +6469,15 @@ static void fill_stream_properties_from_drm_display_mode( (struct drm_connector *)connector, mode_in); if (err < 0) - drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd \n", connector->name, err); + drm_warn_once(connector->dev, "Failed to setup avi infoframe on connector %s: %zd\n", + connector->name, err); timing_out->vic = avi_frame.video_code; err = drm_hdmi_vendor_infoframe_from_display_mode(&hv_frame, (struct drm_connector *)connector, mode_in); if (err < 0) - drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd \n", connector->name, err); + drm_warn_once(connector->dev, "Failed to setup vendor infoframe on connector %s: %zd\n", + connector->name, err); timing_out->hdmi_vic = hv_frame.vic; } @@ -7254,10 +7395,6 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux); drm_connector_unregister(connector); drm_connector_cleanup(connector); - if (aconnector->i2c) { - i2c_del_adapter(&aconnector->i2c->base); - kfree(aconnector->i2c); - } kfree(aconnector->dm_dp_aux.aux.name); kfree(connector); @@ -7519,7 +7656,7 @@ static enum dc_status dm_validate_stream_and_context(struct dc *dc, dc_result = DC_FAIL_ATTACH_SURFACES; if (dc_result == DC_OK) - dc_result = dc_validate_global_state(dc, dc_state, true); + dc_result = dc_validate_global_state(dc, dc_state, DC_VALIDATE_MODE_ONLY); cleanup: if (dc_state) @@ -7557,6 +7694,7 @@ create_validate_stream_for_sink(struct drm_connector *connector, bpc_limit = 8; do { + drm_dbg_kms(connector->dev, "Trying with %d bpc\n", requested_bpc); stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, requested_bpc); @@ -7577,7 +7715,7 @@ create_validate_stream_for_sink(struct drm_connector *connector, dc_result = dm_validate_stream_and_context(adev->dm.dc, stream); if (dc_result != DC_OK) { - DRM_DEBUG_KMS("Mode %dx%d (clk %d) pixel_encoding:%s color_depth:%s failed validation -- %s\n", + DRM_DEBUG_KMS("Pruned mode %d x %d (clk %d) %s %s -- %s\n", drm_mode->hdisplay, drm_mode->vdisplay, drm_mode->clock, @@ -7592,16 +7730,41 @@ create_validate_stream_for_sink(struct drm_connector *connector, } while (stream == NULL && requested_bpc >= bpc_limit); - if ((dc_result == DC_FAIL_ENC_VALIDATE || - dc_result == DC_EXCEED_DONGLE_CAP) && - !aconnector->force_yuv420_output) { - DRM_DEBUG_KMS("%s:%d Retry forcing yuv420 encoding\n", - __func__, __LINE__); - - aconnector->force_yuv420_output = true; + switch (dc_result) { + /* + * If we failed to validate DP bandwidth stream with the requested RGB color depth, + * we try to fallback and configure in order: + * YUV422 (8bpc, 6bpc) + * YUV420 (8bpc, 6bpc) + */ + case DC_FAIL_ENC_VALIDATE: + case DC_EXCEED_DONGLE_CAP: + case DC_NO_DP_LINK_BANDWIDTH: + /* recursively entered twice and already tried both YUV422 and YUV420 */ + if (aconnector->force_yuv422_output && aconnector->force_yuv420_output) + break; + /* first failure; try YUV422 */ + if (!aconnector->force_yuv422_output) { + drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV422\n", + __func__, __LINE__, dc_result); + aconnector->force_yuv422_output = true; + /* recursively entered and YUV422 failed, try YUV420 */ + } else if (!aconnector->force_yuv420_output) { + drm_dbg_kms(connector->dev, "%s:%d Validation failed with %d, retrying w/ YUV420\n", + __func__, __LINE__, dc_result); + aconnector->force_yuv420_output = true; + } stream = create_validate_stream_for_sink(connector, drm_mode, - dm_state, old_stream); + dm_state, old_stream); + aconnector->force_yuv422_output = false; aconnector->force_yuv420_output = false; + break; + case DC_OK: + break; + default: + drm_dbg_kms(connector->dev, "%s:%d Unhandled validation failure %d\n", + __func__, __LINE__, dc_result); + break; } return stream; @@ -7732,6 +7895,9 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, struct amdgpu_dm_connector *aconn = to_amdgpu_dm_connector(conn); int ret; + if (WARN_ON(unlikely(!old_con_state || !new_con_state))) + return -EINVAL; + trace_amdgpu_dm_connector_atomic_check(new_con_state); if (conn->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { @@ -7743,6 +7909,14 @@ amdgpu_dm_connector_atomic_check(struct drm_connector *conn, if (!crtc) return 0; + if (new_con_state->privacy_screen_sw_state != old_con_state->privacy_screen_sw_state) { + new_crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(new_crtc_state)) + return PTR_ERR(new_crtc_state); + + new_crtc_state->mode_changed = true; + } + if (new_con_state->colorspace != old_con_state->colorspace) { new_crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(new_crtc_state)) @@ -7844,6 +8018,23 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, int clock, bpp = 0; bool is_y420 = false; + if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || + (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; + enum drm_mode_status result; + + result = drm_crtc_helper_mode_valid_fixed(encoder->crtc, adjusted_mode, native_mode); + if (result != MODE_OK && dm_new_connector_state->scaling == RMX_OFF) { + drm_dbg_driver(encoder->dev, + "mode %dx%d@%dHz is not native, enabling scaling\n", + adjusted_mode->hdisplay, adjusted_mode->vdisplay, + drm_mode_vrefresh(adjusted_mode)); + dm_new_connector_state->scaling = RMX_FULL; + } + return 0; + } + if (!aconnector->mst_output_port) return 0; @@ -7857,7 +8048,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - mst_state->pbn_div.full = dfixed_const(dm_mst_get_pbn_divider(aconnector->mst_root->dc_link)); + mst_state->pbn_div.full = dm_mst_get_pbn_divider(aconnector->mst_root->dc_link); if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; @@ -8081,6 +8272,10 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, {"1920x1200", 1920, 1200} }; + if ((connector->connector_type != DRM_MODE_CONNECTOR_eDP) && + (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) + return; + n = ARRAY_SIZE(common_modes); for (i = 0; i < n; i++) { @@ -8420,6 +8615,18 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, if (adev->dm.hdcp_workqueue) drm_connector_attach_content_protection_property(&aconnector->base, true); } + + if (connector_type == DRM_MODE_CONNECTOR_eDP) { + struct drm_privacy_screen *privacy_screen; + + privacy_screen = drm_privacy_screen_get(adev_to_drm(adev)->dev, NULL); + if (!IS_ERR(privacy_screen)) { + drm_connector_attach_privacy_screen_provider(&aconnector->base, + privacy_screen); + } else if (PTR_ERR(privacy_screen) != -ENODEV) { + drm_warn(adev_to_drm(adev), "Error getting privacy-screen\n"); + } + } } static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, @@ -8549,7 +8756,7 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, } aconnector->i2c = i2c; - res = i2c_add_adapter(&i2c->base); + res = devm_i2c_add_adapter(dm->adev->dev, &i2c->base); if (res) { drm_err(adev_to_drm(dm->adev), "Failed to register hw i2c %d\n", link->link_index); @@ -8647,7 +8854,16 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev, static void manage_dm_interrupts(struct amdgpu_device *adev, struct amdgpu_crtc *acrtc, struct dm_crtc_state *acrtc_state) -{ +{ /* + * We cannot be sure that the frontend index maps to the same + * backend index - some even map to more than one. + * So we have to go through the CRTC to find the right IRQ. + */ + int irq_type = amdgpu_display_crtc_idx_to_irq_type( + adev, + acrtc->crtc_id); + struct drm_device *dev = adev_to_drm(adev); + struct drm_vblank_crtc_config config = {0}; struct dc_crtc_timing *timing; int offdelay; @@ -8700,7 +8916,35 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, drm_crtc_vblank_on_config(&acrtc->base, &config); + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_get.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): + if (amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get pageflip irq!\n"); +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_get(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot get vline0 irq!\n"); +#endif + } + } else { + /* Allow RX6xxx, RX7700, RX7800 GPUs to call amdgpu_irq_put.*/ + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 3): + case IP_VERSION(3, 2, 0): +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + if (amdgpu_irq_put(adev, &adev->vline0_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put vline0 irq!\n"); +#endif + if (amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type)) + drm_err(dev, "DM_IRQ: Cannot put pageflip irq!\n"); + } + drm_crtc_vblank_off(&acrtc->base); } } @@ -10008,69 +10252,40 @@ static void dm_set_writeback(struct amdgpu_display_manager *dm, drm_writeback_queue_job(wb_conn, new_con_state); } -/** - * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. - * @state: The atomic state to commit - * - * This will tell DC to commit the constructed DC state from atomic_check, - * programming the hardware. Any failures here implies a hardware failure, since - * atomic check should have filtered anything non-kosher. - */ -static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +static void amdgpu_dm_update_hdcp(struct drm_atomic_state *state) { + struct drm_connector_state *old_con_state, *new_con_state; struct drm_device *dev = state->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_display_manager *dm = &adev->dm; - struct dm_atomic_state *dm_state; - struct dc_state *dc_state = NULL; - u32 i, j; - struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - unsigned long flags; - bool wait_for_vblank = true; struct drm_connector *connector; - struct drm_connector_state *old_con_state, *new_con_state; - struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; - int crtc_disable_count = 0; - - trace_amdgpu_dm_atomic_commit_tail_begin(state); - - drm_atomic_helper_update_legacy_modeset_state(dev, state); - drm_dp_mst_atomic_wait_for_dependencies(state); + struct amdgpu_device *adev = drm_to_adev(dev); + int i; - dm_state = dm_atomic_get_new_state(state); - if (dm_state && dm_state->context) { - dc_state = dm_state->context; - amdgpu_dm_commit_streams(state, dc_state); - } + if (!adev->dm.hdcp_workqueue) + return; for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct dm_crtc_state *dm_new_crtc_state; struct amdgpu_dm_connector *aconnector; - if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) + if (!connector || connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; aconnector = to_amdgpu_dm_connector(connector); - if (!adev->dm.hdcp_workqueue) - continue; - - pr_debug("[HDCP_DM] -------------- i : %x ----------\n", i); + drm_dbg(dev, "[HDCP_DM] -------------- i : %x ----------\n", i); - if (!connector) - continue; - - pr_debug("[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n", + drm_dbg(dev, "[HDCP_DM] connector->index: %x connect_status: %x dpms: %x\n", connector->index, connector->status, connector->dpms); - pr_debug("[HDCP_DM] state protection old: %x new: %x\n", + drm_dbg(dev, "[HDCP_DM] state protection old: %x new: %x\n", old_con_state->content_protection, new_con_state->content_protection); if (aconnector->dc_sink) { if (aconnector->dc_sink->sink_signal != SIGNAL_TYPE_VIRTUAL && aconnector->dc_sink->sink_signal != SIGNAL_TYPE_NONE) { - pr_debug("[HDCP_DM] pipe_ctx dispname=%s\n", + drm_dbg(dev, "[HDCP_DM] pipe_ctx dispname=%s\n", aconnector->dc_sink->edid_caps.display_name); } } @@ -10084,7 +10299,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } if (old_crtc_state) - pr_debug("old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", + drm_dbg(dev, "old crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", old_crtc_state->enable, old_crtc_state->active, old_crtc_state->mode_changed, @@ -10092,29 +10307,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) old_crtc_state->connectors_changed); if (new_crtc_state) - pr_debug("NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", + drm_dbg(dev, "NEW crtc en: %x a: %x m: %x a-chg: %x c-chg: %x\n", new_crtc_state->enable, new_crtc_state->active, new_crtc_state->mode_changed, new_crtc_state->active_changed, new_crtc_state->connectors_changed); - } - - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - - if (!adev->dm.hdcp_workqueue) - continue; - new_crtc_state = NULL; - old_crtc_state = NULL; - - if (acrtc) { - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - } dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -10158,7 +10357,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->content_protection >= DRM_MODE_CONTENT_PROTECTION_DESIRED) enable_encryption = true; - drm_info(adev_to_drm(adev), "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); + drm_info(dev, "[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); if (aconnector->dc_link) hdcp_update_display( @@ -10166,6 +10365,78 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) new_con_state->hdcp_content_type, enable_encryption); } } +} + +static int amdgpu_dm_atomic_setup_commit(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int i, ret; + + ret = drm_dp_mst_atomic_setup_commit(state); + if (ret) + return ret; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + /* + * Color management settings. We also update color properties + * when a modeset is needed, to ensure it gets reprogrammed. + */ + if (dm_new_crtc_state->base.active && dm_new_crtc_state->stream && + (dm_new_crtc_state->base.color_mgmt_changed || + dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || + drm_atomic_crtc_needs_modeset(new_crtc_state))) { + ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + if (ret) { + drm_dbg_atomic(state->dev, "Failed to update color state\n"); + return ret; + } + } + } + + return 0; +} + +/** + * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. + * @state: The atomic state to commit + * + * This will tell DC to commit the constructed DC state from atomic_check, + * programming the hardware. Any failures here implies a hardware failure, since + * atomic check should have filtered anything non-kosher. + */ +static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_display_manager *dm = &adev->dm; + struct dm_atomic_state *dm_state; + struct dc_state *dc_state = NULL; + u32 i, j; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + unsigned long flags; + bool wait_for_vblank = true; + struct drm_connector *connector; + struct drm_connector_state *old_con_state = NULL, *new_con_state = NULL; + struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int crtc_disable_count = 0; + + trace_amdgpu_dm_atomic_commit_tail_begin(state); + + drm_atomic_helper_update_legacy_modeset_state(dev, state); + drm_dp_mst_atomic_wait_for_dependencies(state); + + dm_state = dm_atomic_get_new_state(state); + if (dm_state && dm_state->context) { + dc_state = dm_state->context; + amdgpu_dm_commit_streams(state, dc_state); + } + + amdgpu_dm_update_hdcp(state); /* Handle connector state changes */ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { @@ -10268,6 +10539,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) &stream_update); mutex_unlock(&dm->dc_lock); kfree(dummy_updates); + + drm_connector_update_privacy_screen(new_con_state); } /** @@ -10319,6 +10592,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) if (amdgpu_dm_crc_window_is_activated(crtc)) { uint8_t cnt; + spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) { if (acrtc->dm_irq_params.window_param[cnt].enable) { @@ -10621,6 +10895,8 @@ static void get_freesync_config_for_crtc( } else { config.state = VRR_STATE_INACTIVE; } + } else { + config.state = VRR_STATE_UNSUPPORTED; } out: new_crtc_state->freesync_config = config; @@ -10938,7 +11214,7 @@ skip_modeset: if (dm_new_crtc_state->base.color_mgmt_changed || dm_old_crtc_state->regamma_tf != dm_new_crtc_state->regamma_tf || drm_atomic_crtc_needs_modeset(new_crtc_state)) { - ret = amdgpu_dm_update_crtc_color_mgmt(dm_new_crtc_state); + ret = amdgpu_dm_check_crtc_color_mgmt(dm_new_crtc_state, true); if (ret) goto fail; } @@ -12141,7 +12417,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, drm_dbg_atomic(dev, "MST drm_dp_mst_atomic_check() failed\n"); goto fail; } - status = dc_validate_global_state(dc, dm_state->context, true); + status = dc_validate_global_state(dc, dm_state->context, DC_VALIDATE_MODE_ONLY); if (status != DC_OK) { drm_dbg_atomic(dev, "DC global validation failure: %s (%d)", dc_status_to_str(status), status); @@ -12522,7 +12798,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, dm_con_state = to_dm_connector_state(connector->state); - if (!adev->dm.freesync_module) + if (!adev->dm.freesync_module || !dc_supports_vrr(sink->ctx->dce_version)) goto update; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d7d92f9911e4..db75e991ac7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright (C) 2015-2020 Advanced Micro Devices, Inc. All rights reserved. * @@ -152,6 +153,20 @@ struct idle_workqueue { bool running; }; +/** + * struct vupdate_offload_work - Work data for offloading task from vupdate handler + * @work: Kernel work data for the work event + * @adev: amdgpu_device back pointer + * @stream: DC stream associated with the crtc + * @adjust: DC CRTC timing adjust to be applied to the crtc + */ +struct vupdate_offload_work { + struct work_struct work; + struct amdgpu_device *adev; + struct dc_stream_state *stream; + struct dc_crtc_timing_adjust *adjust; +}; + #define MAX_LUMINANCE_DATA_POINTS 99 /** @@ -201,6 +216,11 @@ struct amdgpu_dm_backlight_caps { */ bool aux_support; /** + * @brightness_mask: After deriving brightness, OR it with this mask. + * Workaround for panels with issues with certain brightness values. + */ + u32 brightness_mask; + /** * @ac_level: the default brightness if booted on AC */ u8 ac_level; @@ -636,8 +656,9 @@ struct amdgpu_display_manager { * @bb_from_dmub: * * Bounding box data read from dmub during early initialization for DCN4+ + * Data is stored as a byte array that should be casted to the appropriate bb struct */ - struct dml2_soc_bb *bb_from_dmub; + void *bb_from_dmub; /** * @oem_i2c: @@ -752,6 +773,9 @@ struct amdgpu_dm_connector { uint16_t vc_full_pbn; struct mutex handle_mst_msg_ready; + /* branch device specific data */ + uint32_t branch_ieee_oui; + /* TODO see if we can merge with ddc_bus or make a dm_connector */ struct amdgpu_i2c_adapter *i2c; @@ -775,6 +799,7 @@ struct amdgpu_dm_connector { bool fake_enable; bool force_yuv420_output; + bool force_yuv422_output; struct dsc_preferred_settings dsc_settings; union dp_downstream_port_present mst_downstream_port_present; /* Cached display modes */ @@ -1022,6 +1047,8 @@ void amdgpu_dm_init_color_mod(void); int amdgpu_dm_create_color_properties(struct amdgpu_device *adev); int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct drm_plane_state *plane_state, struct dc_plane_state *dc_plane_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f..a4ac6d442278 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -27,7 +28,6 @@ #include "amdgpu_dm.h" #include "dc.h" #include "modules/color/color_gamma.h" -#include "basics/conversion.h" /** * DOC: overview @@ -566,12 +566,11 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -static int amdgpu_dm_set_atomic_regamma(struct dc_stream_state *stream, +static int amdgpu_dm_set_atomic_regamma(struct dc_transfer_func *out_tf, const struct drm_color_lut *regamma_lut, uint32_t regamma_size, bool has_rom, enum dc_transfer_func_predefined tf) { - struct dc_transfer_func *out_tf = &stream->out_transfer_func; int ret = 0; if (regamma_size || tf != TRANSFER_FUNCTION_LINEAR) { @@ -821,7 +820,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev, struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane_state); const struct drm_color_lut *shaper = NULL, *lut3d = NULL; uint32_t exp_size, size, dim_size = MAX_COLOR_3DLUT_SIZE; - bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut; + bool has_3dlut = adev->dm.dc->caps.color.dpp.hw_3d_lut || adev->dm.dc->caps.color.mpc.preblend; /* shaper LUT is only available if 3D LUT color caps */ exp_size = has_3dlut ? MAX_COLOR_LUT_ENTRIES : 0; @@ -885,33 +884,33 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) } /** - * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * amdgpu_dm_check_crtc_color_mgmt: Check if DRM color props are programmable by DC. * @crtc: amdgpu_dm crtc state + * @check_only: only check color state without update dc stream * - * With no plane level color management properties we're free to use any - * of the HW blocks as long as the CRTC CTM always comes before the - * CRTC RGM and after the CRTC DGM. - * - * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. - * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. - * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * This function just verifies CRTC LUT sizes, if there is enough space for + * output transfer function and if its parameters can be calculated by AMD + * color module. It also adjusts some settings for programming CRTC degamma at + * plane stage, using plane DGM block. * * The RGM block is typically more fully featured and accurate across * all ASICs - DCE can't support a custom non-linear CRTC DGM. * * For supporting both plane level color management and CRTC level color - * management at once we have to either restrict the usage of CRTC properties - * or blend adjustments together. + * management at once we have to either restrict the usage of some CRTC + * properties or blend adjustments together. * * Returns: - * 0 on success. Error code if setup fails. + * 0 on success. Error code if validation fails. */ -int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) + +int amdgpu_dm_check_crtc_color_mgmt(struct dm_crtc_state *crtc, + bool check_only) { struct dc_stream_state *stream = crtc->stream; struct amdgpu_device *adev = drm_to_adev(crtc->base.state->dev); bool has_rom = adev->asic_type <= CHIP_RAVEN; - struct drm_color_ctm *ctm = NULL; + struct dc_transfer_func *out_tf; const struct drm_color_lut *degamma_lut, *regamma_lut; uint32_t degamma_size, regamma_size; bool has_regamma, has_degamma; @@ -940,6 +939,14 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) crtc->cm_has_degamma = false; crtc->cm_is_degamma_srgb = false; + if (check_only) { + out_tf = kvzalloc(sizeof(*out_tf), GFP_KERNEL); + if (!out_tf) + return -ENOMEM; + } else { + out_tf = &stream->out_transfer_func; + } + /* Setup regamma and degamma. */ if (is_legacy) { /* @@ -954,8 +961,8 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * inverse color ramp in legacy userspace. */ crtc->cm_is_degamma_srgb = true; - stream->out_transfer_func.type = TF_TYPE_DISTRIBUTED_POINTS; - stream->out_transfer_func.tf = TRANSFER_FUNCTION_SRGB; + out_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + out_tf->tf = TRANSFER_FUNCTION_SRGB; /* * Note: although we pass has_rom as parameter here, we never * actually use ROM because the color module only takes the ROM @@ -963,16 +970,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * * See more in mod_color_calculate_regamma_params() */ - r = __set_legacy_tf(&stream->out_transfer_func, regamma_lut, + r = __set_legacy_tf(out_tf, regamma_lut, regamma_size, has_rom); - if (r) - return r; } else { regamma_size = has_regamma ? regamma_size : 0; - r = amdgpu_dm_set_atomic_regamma(stream, regamma_lut, + r = amdgpu_dm_set_atomic_regamma(out_tf, regamma_lut, regamma_size, has_rom, tf); - if (r) - return r; } /* @@ -981,6 +984,43 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * have to place the CTM in the OCSC in that case. */ crtc->cm_has_degamma = has_degamma; + if (check_only) + kvfree(out_tf); + + return r; +} + +/** + * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. + * @crtc: amdgpu_dm crtc state + * + * With no plane level color management properties we're free to use any + * of the HW blocks as long as the CRTC CTM always comes before the + * CRTC RGM and after the CRTC DGM. + * + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * + * The RGM block is typically more fully featured and accurate across + * all ASICs - DCE can't support a custom non-linear CRTC DGM. + * + * For supporting both plane level color management and CRTC level color + * management at once we have to either restrict the usage of CRTC properties + * or blend adjustments together. + * + * Returns: + * 0 on success. Error code if setup fails. + */ +int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) +{ + struct dc_stream_state *stream = crtc->stream; + struct drm_color_ctm *ctm = NULL; + int ret; + + ret = amdgpu_dm_check_crtc_color_mgmt(crtc, false); + if (ret) + return ret; /* Setup CRTC CTM. */ if (crtc->base.ctm) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 033bd817d871..e20aa7438066 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 3da056c8d20b..95bdb8699d7f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2019 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 87058271b00c..38f9ea313dcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -218,8 +218,10 @@ static void amdgpu_dm_idle_worker(struct work_struct *work) break; } - if (idle_work->enable) + if (idle_work->enable) { + dc_post_update_surfaces_to_stream(idle_work->dm->dc); dc_allow_idle_optimizations(idle_work->dm->dc, true); + } mutex_unlock(&idle_work->dm->dc_lock); } idle_work->dm->idle_workqueue->running = false; @@ -246,6 +248,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -273,9 +277,20 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + dc_post_update_surfaces_to_stream(dm->dc); + + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); + dc_allow_idle_optimizations(dm->dc, true); + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); + } + mutex_unlock(&dm->dc_lock); dc_stream_release(vblank_work->stream); @@ -293,18 +308,45 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) int irq_type; int rc = 0; - if (acrtc->otg_inst == -1) - goto skip; + if (enable && !acrtc->base.enabled) { + drm_dbg_vbl(crtc->dev, + "Reject vblank enable on unconfigured CRTC %d (enabled=%d)\n", + acrtc->crtc_id, acrtc->base.enabled); + return -EINVAL; + } irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); if (enable) { - /* vblank irq on -> Only need vupdate irq in vrr mode */ - if (amdgpu_dm_crtc_vrr_active(acrtc_state)) - rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); - } else { - /* vblank irq off -> vupdate irq off */ - rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false); + struct dc *dc = adev->dm.dc; + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); + struct psr_settings *psr = &acrtc_state->stream->link->psr_settings; + struct replay_settings *pr = &acrtc_state->stream->link->replay_settings; + bool sr_supported = (psr->psr_version != DC_PSR_VERSION_UNSUPPORTED) || + pr->config.replay_supported; + + /* + * IPS & self-refresh feature can cause vblank counter resets between + * vblank disable and enable. + * It may cause system stuck due to waiting for the vblank counter. + * Call this function to estimate missed vblanks by using timestamps and + * update the vblank counter in DRM. + */ + if (dc->caps.ips_support && + dc->config.disable_ips != DMUB_IPS_DISABLE_ALL && + sr_supported && vblank->config.disable_immediate) + drm_crtc_vblank_restore(crtc); + } + + if (dc_supports_vrr(dm->dc->ctx->dce_version)) { + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_crtc_vrr_active(acrtc_state)) + rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = amdgpu_dm_crtc_set_vupdate_irq(crtc, false); + } } if (rc) @@ -356,7 +398,7 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) return rc; } #endif -skip: + if (amdgpu_in_reset(adev)) return 0; @@ -661,6 +703,15 @@ static int amdgpu_dm_crtc_helper_atomic_check(struct drm_crtc *crtc, return -EINVAL; } + if (!state->legacy_cursor_update && amdgpu_dm_crtc_vrr_active(dm_crtc_state)) { + struct drm_plane_state *primary_state; + + /* Pull in primary plane for correct VRR handling */ + primary_state = drm_atomic_get_plane_state(state, crtc->primary); + if (IS_ERR(primary_state)) + return PTR_ERR(primary_state); + } + /* In some use cases, like reset, no stream is attached */ if (!dm_crtc_state->stream) return 0; @@ -728,7 +779,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, * support programmable degamma anywhere. */ is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; - drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, + /* Dont't enable DRM CRTC degamma property for DCN401 since the + * pre-blending degamma LUT doesn't apply to cursor, and therefore + * can't work similar to a post-blending degamma LUT as in other hw + * versions. + * TODO: revisit it once KMS plane color API is merged. + */ + drm_crtc_enable_color_mgmt(&acrtc->base, + (is_dcn && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ? + MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index c7d13e743e6c..f263e1a4537e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -3106,6 +3107,35 @@ static int replay_get_state(void *data, u64 *val) } /* + * Start / Stop capture Replay residency + */ +static int replay_set_residency(void *data, u64 val) +{ + struct amdgpu_dm_connector *connector = data; + struct dc_link *link = connector->dc_link; + bool is_start = (val != 0); + u32 residency = 0; + + link->dc->link_srv->edp_replay_residency(link, &residency, is_start, PR_RESIDENCY_MODE_PHY); + return 0; +} + +/* + * Read Replay residency + */ +static int replay_get_residency(void *data, u64 *val) +{ + struct amdgpu_dm_connector *connector = data; + struct dc_link *link = connector->dc_link; + u32 residency = 0; + + link->dc->link_srv->edp_replay_residency(link, &residency, false, PR_RESIDENCY_MODE_PHY); + *val = (u64)residency; + + return 0; +} + +/* * Read PSR state */ static int psr_get(void *data, u64 *val) @@ -3324,7 +3354,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(dmcub_trace_event_state_fops, dmcub_trace_event_state_g dmcub_trace_event_state_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(replay_state_fops, replay_get_state, NULL, "%llu\n"); - +DEFINE_DEBUGFS_ATTRIBUTE(replay_residency_fops, replay_get_residency, replay_set_residency, + "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(psr_fops, psr_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(psr_residency_fops, psr_read_residency, NULL, "%llu\n"); @@ -3502,6 +3533,8 @@ void connector_debugfs_init(struct amdgpu_dm_connector *connector) debugfs_create_file("replay_capability", 0444, dir, connector, &replay_capability_fops); debugfs_create_file("replay_state", 0444, dir, connector, &replay_state_fops); + debugfs_create_file_unsafe("replay_residency", 0444, dir, + connector, &replay_residency_fops); debugfs_create_file_unsafe("psr_capability", 0444, dir, connector, &psr_capability_fops); debugfs_create_file_unsafe("psr_state", 0444, dir, connector, &psr_fops); debugfs_create_file_unsafe("psr_residency", 0444, dir, @@ -3988,7 +4021,7 @@ static int capabilities_show(struct seq_file *m, void *unused) struct hubbub *hubbub = dc->res_pool->hubbub; - if (hubbub->funcs->get_mall_en) + if (hubbub && hubbub->funcs->get_mall_en) hubbub->funcs->get_mall_en(hubbub, &mall_in_use); if (dc->cap_funcs.get_subvp_en) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h index 071200473c27..122cdc124b3b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2018 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index c16962256514..19038f336155 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2019 Advanced Micro Devices, Inc. * @@ -222,6 +223,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, display_adjust.disable = MOD_HDCP_DISPLAY_NOT_DISABLE; link_adjust.auth_delay = 2; + link_adjust.retry_limit = MAX_NUM_OF_ATTEMPTS; if (content_type == DRM_MODE_HDCP_CONTENT_TYPE0) { link_adjust.hdcp2.force_type = MOD_HDCP_FORCE_TYPE_0; @@ -571,6 +573,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->dp.usb4_enabled = config->usb4_enabled; display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; link->adjust.auth_delay = 2; + link->adjust.retry_limit = MAX_NUM_OF_ATTEMPTS; link->adjust.hdcp1.disable = 0; hdcp_w->encryption_status[display->index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; @@ -723,8 +726,8 @@ ret: static const struct bin_attribute data_attr = { .attr = {.name = "hdcp_srm", .mode = 0664}, .size = PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, /* Limit SRM size */ - .write_new = srm_data_write, - .read_new = srm_data_read, + .write = srm_data_write, + .read = srm_data_read, }; struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, @@ -765,14 +768,18 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct mod_hdcp_ddc_funcs *ddc_funcs = &config->ddc.funcs; config->psp.handle = &adev->psp; - if (dc->ctx->dce_version == DCN_VERSION_3_1 || + if (dc->ctx->dce_version == DCN_VERSION_3_1 || dc->ctx->dce_version == DCN_VERSION_3_14 || dc->ctx->dce_version == DCN_VERSION_3_15 || - dc->ctx->dce_version == DCN_VERSION_3_5 || + dc->ctx->dce_version == DCN_VERSION_3_16 || + dc->ctx->dce_version == DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_21 || + dc->ctx->dce_version == DCN_VERSION_3_5 || dc->ctx->dce_version == DCN_VERSION_3_51 || - dc->ctx->dce_version == DCN_VERSION_3_6 || - dc->ctx->dce_version == DCN_VERSION_3_16) + dc->ctx->dce_version == DCN_VERSION_3_6 || + dc->ctx->dce_version == DCN_VERSION_4_01) config->psp.caps.dtm_v3_supported = 1; + config->ddc.handle = dc_get_link_at_index(dc, i); ddc_funcs->write_i2c = lp_write_i2c; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h index 69b445b011c8..4faa344f196e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2019 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index d4395b92fb85..cc21337a182f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * @@ -82,6 +83,7 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct edid_caps->panel_patch.remove_sink_ext_caps = true; break; case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4171): drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_colorimetry = true; break; @@ -1029,6 +1031,10 @@ enum dc_edid_status dm_helpers_read_local_edid( return EDID_NO_RESPONSE; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() + if (!edid || + edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH) + return EDID_BAD_INPUT; + sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1); memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index b61e210f6246..a1c722112c22 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h index ba17c23b2706..4f6b58f4f90d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2015 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index 6c9de834455b..3c9995275cbd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2020 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 25e8befbcc47..5e92eaa67aa3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2012-15 Advanced Micro Devices, Inc. * @@ -107,7 +108,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (payload.write && result >= 0) { if (result) { /*one byte indicating partially written bytes*/ - drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + drm_dbg_dp(adev_to_drm(adev), "AUX partially written\n"); result = payload.data[0]; } else if (!payload.reply[0]) /*I2C_ACK|AUX_ACK*/ @@ -133,11 +134,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } - drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + drm_dbg_dp(adev_to_drm(adev), "DP AUX transfer fail:%d\n", operation_result); } if (payload.reply[0]) - drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + drm_dbg_dp(adev_to_drm(adev), "AUX reply command not ACK: 0x%02x.", payload.reply[0]); return result; @@ -329,6 +330,34 @@ static bool retrieve_downstream_port_device(struct amdgpu_dm_connector *aconnect return true; } +static bool retrieve_branch_specific_data(struct amdgpu_dm_connector *aconnector) +{ + struct drm_connector *connector = &aconnector->base; + struct drm_dp_mst_port *port = aconnector->mst_output_port; + struct drm_dp_mst_port *port_parent; + struct drm_dp_aux *immediate_upstream_aux; + struct drm_dp_desc branch_desc; + + if (!port->parent) + return false; + + port_parent = port->parent->port_parent; + + immediate_upstream_aux = port_parent ? &port_parent->aux : port->mgr->aux; + + if (drm_dp_read_desc(immediate_upstream_aux, &branch_desc, true)) + return false; + + aconnector->branch_ieee_oui = (branch_desc.ident.oui[0] << 16) + + (branch_desc.ident.oui[1] << 8) + + (branch_desc.ident.oui[2]); + + drm_dbg_dp(port->aux.drm_dev, "MST branch oui 0x%x detected at %s\n", + aconnector->branch_ieee_oui, connector->name); + + return true; +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -668,6 +697,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_set_path_property(connector, pathprop); + if (!retrieve_branch_specific_data(aconnector)) + aconnector->branch_ieee_oui = 0; + /* * Initialize connector state before adding the connectror to drm and * framebuffer lists @@ -809,6 +841,7 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_dp_aux_init(&aconnector->dm_dp_aux.aux); drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux, &aconnector->base); + drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false); if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP) return; @@ -821,13 +854,20 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, drm_connector_attach_dp_subconnector_property(&aconnector->base); } -int dm_mst_get_pbn_divider(struct dc_link *link) +uint32_t dm_mst_get_pbn_divider(struct dc_link *link) { + uint32_t pbn_div_x100; + uint64_t dividend, divisor; + if (!link) return 0; - return dc_link_bandwidth_kbps(link, - dc_link_get_link_cap(link)) / (8 * 1000 * 54); + dividend = (uint64_t)dc_link_bandwidth_kbps(link, dc_link_get_link_cap(link)) * 100; + divisor = 8 * 1000 * 54; + + pbn_div_x100 = div64_u64(dividend, divisor); + + return dfixed_const(pbn_div_x100) / 100; } struct dsc_mst_fairness_params { @@ -1762,14 +1802,20 @@ static bool dp_get_link_current_set_bw(struct drm_dp_aux *aux, uint32_t *cur_lin union lane_count_set lane_count; u8 dp_link_encoding; u8 link_bw_set = 0; + u8 data[16] = {0}; *cur_link_bw = 0; - if (drm_dp_dpcd_read(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, &dp_link_encoding, 1) != 1 || - drm_dp_dpcd_read(aux, DP_LANE_COUNT_SET, &lane_count.raw, 1) != 1 || - drm_dp_dpcd_read(aux, DP_LINK_BW_SET, &link_bw_set, 1) != 1) + if (drm_dp_dpcd_read(aux, DP_LINK_BW_SET, data, 16) != 16) return false; + dp_link_encoding = data[DP_MAIN_LINK_CHANNEL_CODING_SET - DP_LINK_BW_SET]; + link_bw_set = data[DP_LINK_BW_SET - DP_LINK_BW_SET]; + lane_count.raw = data[DP_LANE_COUNT_SET - DP_LINK_BW_SET]; + + drm_dbg_dp(aux->drm_dev, "MST_DSC downlink setting: %d, 0x%x x %d\n", + dp_link_encoding, link_bw_set, lane_count.bits.LANE_COUNT_SET); + switch (dp_link_encoding) { case DP_8b_10b_ENCODING: link_rate = link_bw_set; @@ -1866,8 +1912,10 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_link_bw = aconnector->mst_local_bw; } - if (end_link_bw > 0 && stream_kbps > end_link_bw) { - DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link." + if (end_link_bw > 0 && + stream_kbps > end_link_bw && + aconnector->branch_ieee_oui != DP_BRANCH_DEVICE_ID_90CC24) { + DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link. " "Mode required bw can't fit into last link\n"); return DC_FAIL_BANDWIDTH_VALIDATE; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 600d6e221011..6f7ea684b555 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2012-15 Advanced Micro Devices, Inc. * @@ -59,7 +60,7 @@ enum mst_msg_ready_type { struct amdgpu_display_manager; struct amdgpu_dm_connector; -int dm_mst_get_pbn_divider(struct dc_link *link); +uint32_t dm_mst_get_pbn_divider(struct dc_link *link); void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index b7c6e8d13435..e027798ece03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -92,9 +92,9 @@ enum dm_micro_swizzle { MICRO_SWIZZLE_R = 3 }; -const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier) { - return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); + return amdgpu_lookup_format_info(pixel_format, modifier); } void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state, @@ -146,7 +146,7 @@ static void amdgpu_dm_plane_add_modifier(uint64_t **mods, uint64_t *size, uint64 if (*cap - *size < 1) { uint64_t new_cap = *cap * 2; - uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); + uint64_t *new_mods = kmalloc_array(new_cap, sizeof(uint64_t), GFP_KERNEL); if (!new_mods) { kfree(*mods); @@ -732,7 +732,7 @@ static int amdgpu_dm_plane_get_plane_modifiers(struct amdgpu_device *adev, unsig if (adev->family < AMDGPU_FAMILY_AI) return 0; - *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); + *mods = kmalloc_array(capacity, sizeof(uint64_t), GFP_KERNEL); if (plane_type == DRM_PLANE_TYPE_CURSOR) { amdgpu_dm_plane_add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); @@ -1633,7 +1633,7 @@ dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, drm_object_attach_property(&plane->base, dm->adev->mode_info.plane_ctm_property, 0); - if (dpp_color_caps.hw_3d_lut) { + if (dpp_color_caps.hw_3d_lut || dm->dc->caps.color.mpc.preblend) { drm_object_attach_property(&plane->base, mode_info.plane_shaper_lut_property, 0); drm_object_attach_property(&plane->base, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 615d2ab2b803..ea2619b507db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -58,7 +58,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, unsigned long possible_crtcs, const struct dc_plane_cap *plane_cap); -const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd); +const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier); void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state, bool *per_pixel_alpha, bool *pre_multiplied_alpha, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 848c5b4bb301..11b2ea6edf95 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -97,6 +98,7 @@ bool dm_pp_apply_display_requirements( const struct dm_pp_single_disp_config *dc_cfg = &pp_display_cfg->disp_configs[i]; adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1; + adev->pm.pm_display_cfg.displays[i].pixel_clock = dc_cfg->pixel_clock; } amdgpu_dpm_display_configuration_change(adev, &adev->pm.pm_display_cfg); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f984cb0cb889..fd491b7a3cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2021 Advanced Micro Devices, Inc. * @@ -26,7 +27,6 @@ #include "amdgpu_dm_psr.h" #include "dc_dmub_srv.h" #include "dc.h" -#include "dm_helpers.h" #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" @@ -119,8 +119,10 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) psr_config.allow_multi_disp_optimizations = (amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT); - if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) - return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) { + if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) + return false; + } ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h index e2366321a3c1..4fb8626913cf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2021 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c index 41f07f13a7b5..80704d709e44 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2023 Advanced Micro Devices, Inc. * @@ -30,7 +31,7 @@ #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" #include "dmub/inc/dmub_cmd.h" -#include "dc/inc/link.h" +#include "dc/inc/link_service.h" /* * amdgpu_dm_link_supports_replay() - check if the link supports replay diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h index 8126bdb1eb6b..73b6c67ae5e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_replay.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2021 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 0005f5f8f34f..8550d5e8b753 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -1,3 +1,4 @@ +//SPDX-License-Identifier: MIT /* * Copyright 2015 Advanced Micro Devices, Inc. * @@ -52,11 +53,11 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc func_name, line); } -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx) { } -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx) +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx) { } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 4686d4b0cbad..aa56fd6d56c3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -1,3 +1,4 @@ +//SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * @@ -726,6 +727,32 @@ TRACE_EVENT(dcn_optc_lock_unlock_state, ) ); +TRACE_EVENT(amdgpu_dm_brightness, + TP_PROTO(void *function, u32 user_brightness, u32 converted_brightness, bool aux, bool ac), + TP_ARGS(function, user_brightness, converted_brightness, aux, ac), + TP_STRUCT__entry( + __field(void *, function) + __field(u32, user_brightness) + __field(u32, converted_brightness) + __field(bool, aux) + __field(bool, ac) + ), + TP_fast_assign( + __entry->function = function; + __entry->user_brightness = user_brightness; + __entry->converted_brightness = converted_brightness; + __entry->aux = aux; + __entry->ac = ac; + ), + TP_printk("%ps: brightness requested=%u converted=%u aux=%s power=%s", + (void *)__entry->function, + (u32)__entry->user_brightness, + (u32)__entry->converted_brightness, + (__entry->aux) ? "true" : "false", + (__entry->ac) ? "AC" : "DC" + ) +); + #endif /* _AMDGPU_DM_TRACE_H_ */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 3c9ecea7eebc..dc943abd6dba 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -37,6 +37,7 @@ DC_LIBS += dcn301 DC_LIBS += dcn31 DC_LIBS += dml DC_LIBS += dml2 +DC_LIBS += soc_and_ip_translator endif DC_LIBS += dce120 diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index d897f8a30ede..4da5adab799c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -1136,7 +1136,7 @@ static void calculate_bandwidth( } } } - data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed(vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1)); + data->total_dmifmc_urgent_trips = bw_ceil2(bw_div(data->total_requests_for_adjusted_dmif_size, (bw_add(dceip->dmif_request_buffer_size, bw_int_to_fixed((uint64_t)vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel * data->number_of_dram_channels)))), bw_int_to_fixed(1)); data->total_dmifmc_urgent_latency = bw_mul(vbios->dmifmc_urgent_latency, data->total_dmifmc_urgent_trips); data->total_display_reads_required_data = bw_int_to_fixed(0); data->total_display_reads_required_dram_access_data = bw_int_to_fixed(0); diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 452206b5095e..6073cadde76c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -284,7 +284,7 @@ struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) dc_fixpt_mul( square, res), - n * (n - 1))); + (long long)n * (n - 1))); n -= 2; } while (n != 0); diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c b/drivers/gpu/drm/amd/display/dc/basics/vector.c index 6d2924114a3e..b413a672c2c0 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/vector.c +++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c @@ -170,7 +170,7 @@ bool dal_vector_remove_at_index( memmove( vector->container + (index * vector->struct_size), vector->container + ((index + 1) * vector->struct_size), - (vector->count - index - 1) * vector->struct_size); + (size_t)(vector->count - index - 1) * vector->struct_size); vector->count -= 1; return true; @@ -219,7 +219,7 @@ bool dal_vector_insert_at( memmove( insert_address + vector->struct_size, insert_address, - vector->struct_size * (vector->count - position)); + (size_t)vector->struct_size * (vector->count - position)); memmove( insert_address, @@ -271,7 +271,7 @@ struct vector *dal_vector_clone( /* copy vector's data */ memmove(vec_cloned->container, vector->container, - vec_cloned->struct_size * vec_cloned->capacity); + (size_t)vec_cloned->struct_size * vec_cloned->capacity); return vec_cloned; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 67f08495b7e6..154fd2c18e88 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -174,11 +174,8 @@ static struct graphics_object_id bios_parser_get_connector_id( return object_id; } - if (tbl->ucNumberOfObjects <= i) { - dm_error("Can't find connector id %d in connector table of size %d.\n", - i, tbl->ucNumberOfObjects); + if (tbl->ucNumberOfObjects <= i) return object_id; - } id = le16_to_cpu(tbl->asObjects[i].usObjectID); object_id = object_id_from_bios_object_id(id); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 2bcae0643e61..58e88778da7f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -993,7 +993,7 @@ static enum bp_result set_pixel_clock_v3( allocation.sPCLKInput.usFbDiv = cpu_to_le16((uint16_t)bp_params->feedback_divider); allocation.sPCLKInput.ucFracFbDiv = - (uint8_t)bp_params->fractional_feedback_divider; + (uint8_t)(bp_params->fractional_feedback_divider / 100000); allocation.sPCLKInput.ucPostDiv = (uint8_t)bp_params->pixel_clock_post_divider; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 2c645dffec18..f2b1720a6a66 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -396,6 +396,7 @@ static enum bp_result transmitter_control_v1_7( process_phy_transition_init_params.display_port_link_rate = link->cur_link_settings.link_rate; process_phy_transition_init_params.transition_bitmask = link->phy_transition_bitmask; } + dig_v1_7.skip_phy_ssc_reduction = link->wa_flags.skip_phy_ssc_reduction; } // Handle PRE_OFF_TO_ON: Process ACPI PHY Transition Interlock diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d9955c5d2e5e..60021671b386 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -112,7 +112,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ############################################################################### # DCN30 ############################################################################### -CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o +CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o dcn30m_clk_mgr.o dcn30m_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 4c3e58c730b1..15cf13ec5302 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -28,7 +28,7 @@ #include "dccg.h" #include "clk_mgr_internal.h" #include "dc_state_priv.h" -#include "link.h" +#include "link_service.h" #include "dce100/dce_clk_mgr.h" #include "dce110/dce110_clk_mgr.h" @@ -67,7 +67,7 @@ int clk_mgr_helper_get_active_display_cnt( if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; - if (!stream->dpms_off || (stream_status && stream_status->plane_count)) + if (!stream->dpms_off || dc->is_switch_in_progress_dest || (stream_status && stream_status->plane_count)) display_count++; } @@ -158,7 +158,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p return NULL; } dce60_clk_mgr_construct(ctx, clk_mgr); - dce_clk_mgr_construct(ctx, clk_mgr); return &clk_mgr->base; } #endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index 26feefbb8990..6131ede2db7a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -72,9 +72,9 @@ static const struct state_dependent_clocks dce80_max_clks_by_state[] = { /* ClocksStateLow */ { .display_clk_khz = 352000, .pixel_clk_khz = 330000}, /* ClocksStateNominal */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 }, /* ClocksStatePerformance */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; +{ .display_clk_khz = 625000, .pixel_clk_khz = 400000 } }; int dentist_get_divider_from_did(int did) { @@ -245,6 +245,11 @@ int dce_set_clock( pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10; pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + /* DCE 6.0, DCE 6.4: engine clock is the same as PLL0 */ + if (clk_mgr_base->ctx->dce_version == DCE_VERSION_6_0 || + clk_mgr_base->ctx->dce_version == DCE_VERSION_6_4) + pxl_clk_params.pll_id = CLOCK_SOURCE_ID_PLL0; + if (clk_mgr_dce->dfs_bypass_active) pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true; @@ -386,8 +391,6 @@ static void dce_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) @@ -400,11 +403,9 @@ static void dce_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ @@ -462,6 +463,9 @@ void dce_clk_mgr_construct( clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID; + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + dce_clock_read_integrated_info(clk_mgr); dce_clock_read_ss_info(clk_mgr); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index f8409453434c..d50b9440210e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -120,9 +120,15 @@ void dce110_fill_display_configs( const struct dc_state *context, struct dm_pp_display_configuration *pp_display_cfg) { + struct dc *dc = context->clk_mgr->ctx->dc; int j; int num_cfgs = 0; + pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); + pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; + pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; + pp_display_cfg->crtc_index = dc->res_pool->res_cap->num_timing_generator; + for (j = 0; j < context->stream_count; j++) { int k; @@ -158,12 +164,29 @@ void dce110_fill_display_configs( stream->link->cur_link_settings.link_rate; cfg->link_settings.link_spread = stream->link->cur_link_settings.link_spread; - cfg->sym_clock = stream->phy_pix_clk; + cfg->pixel_clock = stream->phy_pix_clk; /* Round v_refresh*/ cfg->v_refresh = stream->timing.pix_clk_100hz * 100; cfg->v_refresh /= stream->timing.h_total; cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) / stream->timing.v_total; + + /* Find first CRTC index and calculate its line time. + * This is necessary for DPM on SI GPUs. + */ + if (cfg->pipe_idx < pp_display_cfg->crtc_index) { + const struct dc_crtc_timing *timing = + &context->streams[0]->timing; + + pp_display_cfg->crtc_index = cfg->pipe_idx; + pp_display_cfg->line_time_in_us = + timing->h_total * 10000 / timing->pix_clk_100hz; + } + } + + if (!num_cfgs) { + pp_display_cfg->crtc_index = 0; + pp_display_cfg->line_time_in_us = 0; } pp_display_cfg->display_count = num_cfgs; @@ -223,25 +246,8 @@ void dce11_pplib_apply_display_requirements( pp_display_cfg->min_engine_clock_deep_sleep_khz = context->bw_ctx.bw.dce.sclk_deep_sleep_khz; - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /* TODO: dce11.2*/ - pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; - - pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); - /* TODO: is this still applicable?*/ - if (pp_display_cfg->display_count == 1) { - const struct dc_crtc_timing *timing = - &context->streams[0]->timing; - - pp_display_cfg->crtc_index = - pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz; - } - if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c index 0267644717b2..69dd80d9f738 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce60/dce60_clk_mgr.c @@ -83,22 +83,13 @@ static const struct state_dependent_clocks dce60_max_clks_by_state[] = { static int dce60_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - int dprefclk_wdivider; - int dp_ref_clk_khz; - int target_div; + struct dc_context *ctx = clk_mgr_base->ctx; + int dp_ref_clk_khz = 0; - /* DCE6 has no DPREFCLK_CNTL to read DP Reference Clock source */ - - /* Read the mmDENTIST_DISPCLK_CNTL to get the currently - * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ - REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); - - /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ - target_div = dentist_get_divider_from_did(dprefclk_wdivider); - - /* Calculate the current DFS clock, in kHz.*/ - dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; + if (ASIC_REV_IS_TAHITI_P(ctx->asic_id.hw_internal_rev)) + dp_ref_clk_khz = ctx->dc_bios->fw_info.default_display_engine_pll_frequency; + else + dp_ref_clk_khz = clk_mgr_base->clks.dispclk_khz; return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz); } @@ -109,8 +100,6 @@ static void dce60_pplib_apply_display_requirements( { struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); - dce110_fill_display_configs(context, pp_display_cfg); if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) @@ -123,11 +112,9 @@ static void dce60_update_clocks(struct clk_mgr *clk_mgr_base, { struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dm_pp_power_level_change_request level_change_req; - int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz; - - /*TODO: W/A for dal3 linux, investigate why this works */ - if (!clk_mgr_dce->dfs_bypass_active) - patched_disp_clk = patched_disp_clk * 115 / 100; + const int max_disp_clk = + clk_mgr_dce->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; + int patched_disp_clk = MIN(max_disp_clk, context->bw_ctx.bw.dce.dispclk_khz); level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context); /* get max clock state from PPLIB */ @@ -160,6 +147,8 @@ void dce60_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_internal *clk_mgr) { + struct clk_mgr *base = &clk_mgr->base; + dce_clk_mgr_construct(ctx, clk_mgr); memcpy(clk_mgr->max_clks_by_state, @@ -170,5 +159,8 @@ void dce60_clk_mgr_construct( clk_mgr->clk_mgr_shift = &disp_clk_shift; clk_mgr->clk_mgr_mask = &disp_clk_mask; clk_mgr->base.funcs = &dce60_funcs; + + base->clks.max_supported_dispclk_khz = + clk_mgr->max_clks_by_state[DM_PP_CLOCKS_STATE_PERFORMANCE].display_clk_khz; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h index fa09c594fd36..06da34676965 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h @@ -56,6 +56,7 @@ #define DALSMC_MSG_SetDisplayRefreshFromMall 0xF #define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10 #define DALSMC_MSG_BacoAudioD3PME 0x11 -#define DALSMC_Message_Count 0x12 +#define DALSMC_MSG_SmartAccess 0x12 +#define DALSMC_Message_Count 0x13 #endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 8083a553c60e..ef77fcd164ed 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -30,6 +30,7 @@ #include "dce100/dce_clk_mgr.h" #include "dcn30/dcn30_clk_mgr.h" #include "dml/dcn30/dcn30_fpu.h" +#include "dcn30/dcn30m_clk_mgr.h" #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" @@ -498,7 +499,8 @@ static struct clk_mgr_funcs dcn3_funcs = { .are_clock_states_equal = dcn3_are_clock_states_equal, .enable_pme_wa = dcn3_enable_pme_wa, .notify_link_rate_change = dcn30_notify_link_rate_change, - .is_smu_present = dcn3_is_smu_present + .is_smu_present = dcn3_is_smu_present, + .set_smartmux_switch = dcn30m_set_smartmux_switch }; static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c index 3253115a153d..827bc2431d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c @@ -69,7 +69,7 @@ static uint32_t dcn30_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un /* handle DALSMC_Result_CmdRejectedBusy? */ - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c new file mode 100644 index 000000000000..8e8a11c7437e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c @@ -0,0 +1,36 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr_internal.h" +#include "dcn30/dcn30m_clk_mgr.h" +#include "dcn30m_clk_mgr_smu_msg.h" + + +uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return dcn30m_smu_set_smart_mux_switch(clk_mgr, pins_to_set); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h new file mode 100644 index 000000000000..757985b2eadc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN30M_CLK_MGR_H__ +#define __DCN30M_CLK_MGR_H__ + +uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set); + +#endif //__DCN30M_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c new file mode 100644 index 000000000000..0dd0583ff21e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c @@ -0,0 +1,118 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn30m_clk_mgr_smu_msg.h" + +#include "clk_mgr_internal.h" +#include "reg_helper.h" +#include "dm_helpers.h" + +#include "dalsmc.h" + +#define mmDAL_MSG_REG 0x1628A +#define mmDAL_ARG_REG 0x16273 +#define mmDAL_RESP_REG 0x16274 + +#define REG(reg_name) \ + mm ## reg_name + +#include "logger_types.h" +#undef DC_LOGGER +#define DC_LOGGER \ + CTX->logger +#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } + + +/* + * Function to be used instead of REG_WAIT macro because the wait ends when + * the register is NOT EQUAL to zero, and because the translation in msg_if.h + * won't work with REG_WAIT. + */ +static uint32_t dcn30m_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, + unsigned int delay_us, unsigned int max_retries) +{ + uint32_t reg = 0; + + do { + reg = REG_READ(DAL_RESP_REG); + if (reg) + break; + + if (delay_us >= 1000) + msleep(delay_us/1000); + else if (delay_us > 0) + udelay(delay_us); + } while (max_retries--); + + /* handle DALSMC_Result_CmdRejectedBusy? */ + + /* Log? */ + + return reg; +} + +static bool dcn30m_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + uint32_t msg_id, uint32_t param_in, uint32_t *param_out) +{ + uint32_t result; + /* Wait for response register to be ready */ + dcn30m_smu_wait_for_response(clk_mgr, 10, 200000); + + /* Clear response register */ + REG_WRITE(DAL_RESP_REG, 0); + + /* Set the parameter register for the SMU message */ + REG_WRITE(DAL_ARG_REG, param_in); + + /* Trigger the message transaction by writing the message ID */ + REG_WRITE(DAL_MSG_REG, msg_id); + + result = dcn30m_smu_wait_for_response(clk_mgr, 10, 200000); + + if (IS_SMU_TIMEOUT(result)) + dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000); + + /* Wait for response */ + if (result == DALSMC_Result_OK) { + if (param_out) + *param_out = REG_READ(DAL_ARG_REG); + + return true; + } + + return false; +} + +uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set) +{ + uint32_t response = 0; + + smu_print("SMU Set SmartMux Switch: switch_dgpu = %d\n", pins_to_set); + + dcn30m_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SmartAccess, pins_to_set, &response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h new file mode 100644 index 000000000000..8a59a473fc5e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ +#define DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ + +#include "core_types.h" + +struct clk_mgr_internal; + +uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set); +#endif /* DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 9e2ef0e724fc..7aee02d56292 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -563,6 +563,7 @@ static void vg_clk_mgr_helper_populate_bw_params( { int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; + uint32_t max_dispclk = 0, max_dppclk = 0; j = -1; @@ -584,6 +585,15 @@ static void vg_clk_mgr_helper_populate_bw_params( return; } + /* dispclk and dppclk can be max at any voltage, same number of levels for both */ + if (clock_table->NumDispClkLevelsEnabled <= VG_NUM_DISPCLK_DPM_LEVELS && + clock_table->NumDispClkLevelsEnabled <= VG_NUM_DPPCLK_DPM_LEVELS) { + max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); + max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); + } else { + ASSERT(0); + } + bw_params->clk_table.num_entries = j + 1; for (i = 0; i < bw_params->clk_table.num_entries - 1; i++, j--) { @@ -591,11 +601,17 @@ static void vg_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->DfPstateTable[j].voltage); + + /* Now update clocks we do read */ + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; } bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].memclk; bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].voltage; bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, VG_NUM_DCFCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, VG_NUM_DISPCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, VG_NUM_DPPCLK_DPM_LEVELS); bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index bc123f1884da..051052bd10c9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -47,7 +47,7 @@ #include "dcn30/dcn30_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "logger_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 91d872d6d392..9e63fa72101c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -48,7 +48,7 @@ #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "dcn314_smu.h" @@ -77,6 +77,7 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger + #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 @@ -87,8 +88,70 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define regCLK1_CLK0_DFS_CNTL 0x0269 +#define regCLK1_CLK0_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DFS_CNTL 0x026c +#define regCLK1_CLK1_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DFS_CNTL 0x026f +#define regCLK1_CLK2_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DFS_CNTL 0x0272 +#define regCLK1_CLK3_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DFS_CNTL 0x0275 +#define regCLK1_CLK4_DFS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DFS_CNTL 0x0278 +#define regCLK1_CLK5_DFS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_CURRENT_CNT 0x02fb +#define regCLK1_CLK0_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK1_CURRENT_CNT 0x02fc +#define regCLK1_CLK1_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK2_CURRENT_CNT 0x02fd +#define regCLK1_CLK2_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK3_CURRENT_CNT 0x02fe +#define regCLK1_CLK3_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK4_CURRENT_CNT 0x02ff +#define regCLK1_CLK4_CURRENT_CNT_BASE_IDX 0 +#define regCLK1_CLK5_CURRENT_CNT 0x0300 +#define regCLK1_CLK5_CURRENT_CNT_BASE_IDX 0 + +#define regCLK1_CLK0_BYPASS_CNTL 0x028a +#define regCLK1_CLK0_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_BYPASS_CNTL 0x0293 +#define regCLK1_CLK1_BYPASS_CNTL_BASE_IDX 0 #define regCLK1_CLK2_BYPASS_CNTL 0x029c #define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_BYPASS_CNTL 0x02a5 +#define regCLK1_CLK3_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_BYPASS_CNTL 0x02ae +#define regCLK1_CLK4_BYPASS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_BYPASS_CNTL 0x02b7 +#define regCLK1_CLK5_BYPASS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_DS_CNTL 0x0283 +#define regCLK1_CLK0_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK1_DS_CNTL 0x028c +#define regCLK1_CLK1_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK2_DS_CNTL 0x0295 +#define regCLK1_CLK2_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK3_DS_CNTL 0x029e +#define regCLK1_CLK3_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK4_DS_CNTL 0x02a7 +#define regCLK1_CLK4_DS_CNTL_BASE_IDX 0 +#define regCLK1_CLK5_DS_CNTL 0x02b0 +#define regCLK1_CLK5_DS_CNTL_BASE_IDX 0 + +#define regCLK1_CLK0_ALLOW_DS 0x0284 +#define regCLK1_CLK0_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK1_ALLOW_DS 0x028d +#define regCLK1_CLK1_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK2_ALLOW_DS 0x0296 +#define regCLK1_CLK2_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK3_ALLOW_DS 0x029f +#define regCLK1_CLK3_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK4_ALLOW_DS 0x02a8 +#define regCLK1_CLK4_ALLOW_DS_BASE_IDX 0 +#define regCLK1_CLK5_ALLOW_DS 0x02b1 +#define regCLK1_CLK5_ALLOW_DS_BASE_IDX 0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 @@ -185,6 +248,8 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz; + struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr_int); + struct clk_log_info log_info = {0}; memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate @@ -200,6 +265,9 @@ void dcn314_init_clocks(struct clk_mgr *clk_mgr) dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz); else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + + dcn314_dump_clk_registers(&clk_mgr->boot_snapshot, &clk_mgr_dcn314->base.base, &log_info); + clk_mgr->clks.dispclk_khz = clk_mgr->boot_snapshot.dispclk * 1000; } void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, @@ -218,6 +286,8 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + display_count = dcn314_get_active_display_cnt_wa(dc, context); + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -236,7 +306,6 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { - display_count = dcn314_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ if (display_count == 0) { union display_idle_optimization_u idle_info = { 0 }; @@ -293,11 +362,19 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn314_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; @@ -385,10 +462,65 @@ bool dcn314_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + +static void dcn314_dump_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) { - return; + + struct dcn35_clk_internal internal = {0}; + + dcn314_dump_clk_registers_internal(&internal, clk_mgr_base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + } static struct clk_bw_params dcn314_bw_params = { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index 002c28e80720..0577eb527bc3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -65,4 +65,9 @@ void dcn314_clk_mgr_construct(struct dc_context *ctx, void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); + +void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info); + + #endif //__DCN314_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index e4d22f74f986..b315ed91e010 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -46,7 +46,7 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger -#include "link.h" +#include "link_service.h" #define TO_CLK_MGR_DCN315(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn315, base) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 49efea0c8fcf..1769b1f26e75 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -39,7 +39,7 @@ #include "dcn316_smu.h" #include "dm_helpers.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" // DCN316 this is CLK1 instance #define MAX_INSTANCE 7 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 084994c650c4..7da7b41bd092 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -33,7 +33,7 @@ #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn32_smu13_driver_if.h" @@ -1047,11 +1047,8 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &num_entries_per_clk->num_fclk_levels); clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); - if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) { - num_levels = num_entries_per_clk->num_memclk_levels; - } else { - num_levels = num_entries_per_clk->num_fclk_levels; - } + num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels); + clk_mgr_base->bw_params->max_memclk_mhz = clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c index cf2d35363e8b..5d80fdf63ffc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr_smu_msg.c @@ -63,7 +63,8 @@ static uint32_t dcn32_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, un udelay(delay_us); } while (max_retries--); - TRACE_SMU_DELAY(delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, delay_us * (initial_max_retries - max_retries), clk_mgr->base.ctx); + return reg; } @@ -120,7 +121,7 @@ static uint32_t dcn32_smu_wait_for_response_delay(struct clk_mgr_internal *clk_m *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); + TRACE_SMU_MSG_DELAY(0, 0, *total_delay_us, clk_mgr->base.ctx); return reg; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index bb1ac12a2b09..b11383fba35f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -44,7 +44,7 @@ #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" -#include "link.h" +#include "link_service.h" #include "logger_types.h" #undef DC_LOGGER @@ -587,9 +587,118 @@ bool dcn35_are_clock_states_equal(struct dc_clocks *a, return true; } -static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, +static void dcn35_save_clk_registers_internal(struct dcn35_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + // read dtbclk + internal->CLK1_CLK4_CURRENT_CNT = REG_READ(CLK1_CLK4_CURRENT_CNT); + internal->CLK1_CLK4_BYPASS_CNTL = REG_READ(CLK1_CLK4_BYPASS_CNTL); + + // read dcfclk + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + // read dcf deep sleep divider + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + // read dppclk + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + // read dprefclk + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + // read dispclk + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +static void dcn35_save_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr_dcn35 *clk_mgr) { + struct dcn35_clk_internal internal = {0}; + char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3 400 FCH", "0x4 600 FCH"}; + + dcn35_save_clk_registers_internal(&internal, &clk_mgr->base.base); + + regs_and_bypass->dcfclk = internal.CLK1_CLK3_CURRENT_CNT / 10; + regs_and_bypass->dcf_deep_sleep_divider = internal.CLK1_CLK3_DS_CNTL / 10; + regs_and_bypass->dcf_deep_sleep_allow = internal.CLK1_CLK3_ALLOW_DS; + regs_and_bypass->dprefclk = internal.CLK1_CLK2_CURRENT_CNT / 10; + regs_and_bypass->dispclk = internal.CLK1_CLK0_CURRENT_CNT / 10; + regs_and_bypass->dppclk = internal.CLK1_CLK1_CURRENT_CNT / 10; + regs_and_bypass->dtbclk = internal.CLK1_CLK4_CURRENT_CNT / 10; + + regs_and_bypass->dppclk_bypass = internal.CLK1_CLK1_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dppclk_bypass < 0 || regs_and_bypass->dppclk_bypass > 4) + regs_and_bypass->dppclk_bypass = 0; + regs_and_bypass->dcfclk_bypass = internal.CLK1_CLK3_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dcfclk_bypass < 0 || regs_and_bypass->dcfclk_bypass > 4) + regs_and_bypass->dcfclk_bypass = 0; + regs_and_bypass->dispclk_bypass = internal.CLK1_CLK0_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dispclk_bypass < 0 || regs_and_bypass->dispclk_bypass > 4) + regs_and_bypass->dispclk_bypass = 0; + regs_and_bypass->dprefclk_bypass = internal.CLK1_CLK2_BYPASS_CNTL & 0x0007; + if (regs_and_bypass->dprefclk_bypass < 0 || regs_and_bypass->dprefclk_bypass > 4) + regs_and_bypass->dprefclk_bypass = 0; + + if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { + DC_LOG_SMU("clk_type,clk_value,deepsleep_cntl,deepsleep_allow,bypass\n"); + + DC_LOG_SMU("dcfclk,%d,%d,%d,%s\n", + regs_and_bypass->dcfclk, + regs_and_bypass->dcf_deep_sleep_divider, + regs_and_bypass->dcf_deep_sleep_allow, + bypass_clks[(int) regs_and_bypass->dcfclk_bypass]); + + DC_LOG_SMU("dprefclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dprefclk, + bypass_clks[(int) regs_and_bypass->dprefclk_bypass]); + + DC_LOG_SMU("dispclk,%d,N/A,N/A,%s\n", + regs_and_bypass->dispclk, + bypass_clks[(int) regs_and_bypass->dispclk_bypass]); + + // REGISTER VALUES + DC_LOG_SMU("reg_name,value,clk_type"); + + DC_LOG_SMU("CLK1_CLK3_CURRENT_CNT,%d,dcfclk", + internal.CLK1_CLK3_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK4_CURRENT_CNT,%d,dtbclk", + internal.CLK1_CLK4_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_DS_CNTL,%d,dcf_deep_sleep_divider", + internal.CLK1_CLK3_DS_CNTL); + + DC_LOG_SMU("CLK1_CLK3_ALLOW_DS,%d,dcf_deep_sleep_allow", + internal.CLK1_CLK3_ALLOW_DS); + + DC_LOG_SMU("CLK1_CLK2_CURRENT_CNT,%d,dprefclk", + internal.CLK1_CLK2_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK0_CURRENT_CNT,%d,dispclk", + internal.CLK1_CLK0_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK1_CURRENT_CNT,%d,dppclk", + internal.CLK1_CLK1_CURRENT_CNT); + + DC_LOG_SMU("CLK1_CLK3_BYPASS_CNTL,%d,dcfclk_bypass", + internal.CLK1_CLK3_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK2_BYPASS_CNTL,%d,dprefclk_bypass", + internal.CLK1_CLK2_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK0_BYPASS_CNTL,%d,dispclk_bypass", + internal.CLK1_CLK0_BYPASS_CNTL); + + DC_LOG_SMU("CLK1_CLK1_BYPASS_CNTL,%d,dppclk_bypass", + internal.CLK1_CLK1_BYPASS_CNTL); + + } } static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) @@ -623,6 +732,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr) void dcn35_init_clocks(struct clk_mgr *clk_mgr) { struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr); + struct clk_mgr_dcn35 *clk_mgr_dcn35 = TO_CLK_MGR_DCN35(clk_mgr_int); init_clk_states(clk_mgr); @@ -633,6 +743,13 @@ void dcn35_init_clocks(struct clk_mgr *clk_mgr) else clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz; + dcn35_save_clk_registers(&clk_mgr->boot_snapshot, clk_mgr_dcn35); + + clk_mgr->clks.ref_dtbclk_khz = clk_mgr->boot_snapshot.dtbclk * 10; + if (clk_mgr->boot_snapshot.dtbclk > 59000) { + /*dtbclk enabled based on */ + clk_mgr->clks.dtbclk_en = true; + } } static struct clk_bw_params dcn35_bw_params = { .vram_type = Ddr4MemType, @@ -1323,7 +1440,7 @@ void dcn35_clk_mgr_construct( dcn35_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ - dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); + dcn35_save_clk_registers(&clk_mgr->base.base.boot_snapshot, clk_mgr); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index a3b8e3d4a429..306016c1f109 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -13,7 +13,7 @@ #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #include "atomfirmware.h" @@ -22,8 +22,6 @@ #include "dcn/dcn_4_1_0_offset.h" #include "dcn/dcn_4_1_0_sh_mask.h" -#include "dml/dcn401/dcn401_fpu.h" - #define DCN_BASE__INST0_SEG1 0x000000C0 #define mmCLK01_CLK0_CLK_PLL_REQ 0x16E37 @@ -164,7 +162,7 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c unsigned int i; char *entry_i = (char *)entry_0; - uint32_t ret = dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); + uint32_t ret = dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, 0xFF); if (ret & (1 << 31)) /* fine-grained, only min and max */ @@ -176,50 +174,43 @@ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e c /* if the initial message failed, num_levels will be 0 */ for (i = 0; i < *num_levels && i < ARRAY_SIZE(clk_mgr->base.bw_params->clk_table.entries); i++) { - *((unsigned int *)entry_i) = (dcn30_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); + *((unsigned int *)entry_i) = (dcn401_smu_get_dpm_freq_by_index(clk_mgr, clk, i) & 0xFFFF); entry_i += sizeof(clk_mgr->base.bw_params->clk_table.entries[0]); } } static void dcn401_build_wm_range_table(struct clk_mgr *clk_mgr) { - /* legacy */ - DC_FP_START(); - dcn401_build_wm_range_table_fpu(clk_mgr); - DC_FP_END(); - - if (clk_mgr->ctx->dc->debug.using_dml21) { - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Unused on dcn4 */ - clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = false; - - /* Set 1A - Dummy P-State - P-State latency set to "dummy p-state" value */ - /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ - if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_uclk = 0xFFFF; - } else { - clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = false; - } - - /* Set 1B - Unused on dcn4 */ - clk_mgr->bw_params->wm_table.nv_entries[WM_1B].valid = false; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Unused on dcn4 */ + clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = false; + + /* Set 1A - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = true; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].pmfw_breakdown.max_uclk = 0xFFFF; + } else { + clk_mgr->bw_params->wm_table.nv_entries[WM_1A].valid = false; } + + /* Set 1B - Unused on dcn4 */ + clk_mgr->bw_params->wm_table.nv_entries[WM_1B].valid = false; } void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) @@ -240,20 +231,20 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr->smu_present = false; clk_mgr->dpm_present = false; - if (!clk_mgr_base->force_smu_not_present && dcn30_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) + if (!clk_mgr_base->force_smu_not_present && dcn401_smu_get_smu_version(clk_mgr, &clk_mgr->smu_ver)) clk_mgr->smu_present = true; if (!clk_mgr->smu_present) return; - dcn30_smu_check_driver_if_version(clk_mgr); - dcn30_smu_check_msg_header_version(clk_mgr); + dcn401_smu_check_driver_if_version(clk_mgr); + dcn401_smu_check_msg_header_version(clk_mgr); /* DCFCLK */ dcn401_init_single_clock(clk_mgr, PPCLK_DCFCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dcfclk_mhz, &num_entries_per_clk->num_dcfclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); + clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DCFCLK); if (num_entries_per_clk->num_dcfclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dcfclk_levels - 1].dcfclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz = 0; @@ -262,7 +253,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_SOCCLK, &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, &num_entries_per_clk->num_socclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); + clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_SOCCLK); if (num_entries_per_clk->num_socclk_levels && clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_socclk_levels - 1].socclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz = 0; @@ -272,7 +263,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DTBCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_entries_per_clk->num_dtbclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); + clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DTBCLK); if (num_entries_per_clk->num_dtbclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dtbclk_levels - 1].dtbclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz = 0; @@ -282,7 +273,7 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_DISPCLK, &clk_mgr_base->bw_params->clk_table.entries[0].dispclk_mhz, &num_entries_per_clk->num_dispclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); + clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_DISPCLK); if (num_entries_per_clk->num_dispclk_levels && clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_dispclk_levels - 1].dispclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz = 0; @@ -320,6 +311,25 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_build_wm_range_table(clk_mgr_base); } +bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return clk_mgr->smu_present && clk_mgr->dpm_present && + ((clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.dcfclk_mhz) || + (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.dispclk_mhz) || + (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.dtbclk_mhz) || + (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_fclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz) || + (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz) || + (clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_socclk_levels && + clk_mgr_base->bw_params->dc_mode_limit.socclk_mhz)); +} + static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) { @@ -1308,8 +1318,8 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) table->Watermarks.WatermarkRow[i].WmSetting = i; table->Watermarks.WatermarkRow[i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type; } - dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); - dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); + dcn401_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); + dcn401_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); dcn401_smu_transfer_wm_table_dram_2_smu(clk_mgr); } @@ -1380,7 +1390,7 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz; } - clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); + clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_UCLK); if (num_entries_per_clk->num_memclk_levels && clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_memclk_levels - 1].memclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.memclk_mhz = 0; @@ -1389,16 +1399,12 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) dcn401_init_single_clock(clk_mgr, PPCLK_FCLK, &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, &num_entries_per_clk->num_fclk_levels); - clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn30_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); + clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = dcn401_smu_get_dc_mode_max_dpm_freq(clk_mgr, PPCLK_FCLK); if (num_entries_per_clk->num_fclk_levels && clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz == clk_mgr_base->bw_params->clk_table.entries[num_entries_per_clk->num_fclk_levels - 1].fclk_mhz) clk_mgr_base->bw_params->dc_mode_limit.fclk_mhz = 0; - if (num_entries_per_clk->num_memclk_levels >= num_entries_per_clk->num_fclk_levels) { - num_levels = num_entries_per_clk->num_memclk_levels; - } else { - num_levels = num_entries_per_clk->num_fclk_levels; - } + num_levels = max(num_entries_per_clk->num_memclk_levels, num_entries_per_clk->num_fclk_levels); clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; @@ -1490,6 +1496,35 @@ static int dcn401_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) return 0; } +unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + unsigned int num_clk_levels; + + switch (clk_type) { + case CLK_TYPE_DISPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dispclk; + case CLK_TYPE_DPPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dppclk; + case CLK_TYPE_DSCCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 : + clk_mgr->base.boot_snapshot.dispclk / 3; + default: + break; + } + + return 0; +} + static struct clk_mgr_funcs dcn401_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn401_get_dtb_ref_freq_khz, @@ -1505,6 +1540,8 @@ static struct clk_mgr_funcs dcn401_funcs = { .get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist, .get_hard_min_memclk = dcn401_get_hard_min_memclk, .get_hard_min_fclk = dcn401_get_hard_min_fclk, + .is_dc_mode_present = dcn401_is_dc_mode_present, + .get_max_clock_khz = dcn401_get_max_clock_khz, }; struct clk_mgr_internal *dcn401_clk_mgr_construct( @@ -1565,7 +1602,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); if (!clk_mgr->base.bw_params) { BREAK_TO_DEBUGGER(); - kfree(clk_mgr); + kfree(clk_mgr401); return NULL; } @@ -1576,6 +1613,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( if (!clk_mgr->wm_range_table) { BREAK_TO_DEBUGGER(); kfree(clk_mgr->base.bw_params); + kfree(clk_mgr401); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h index 6c9ae5ca2c7e..97a1ce1e8a9e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h @@ -105,10 +105,13 @@ struct dcn401_clk_mgr { }; void dcn401_init_clocks(struct clk_mgr *clk_mgr_base); +bool dcn401_is_dc_mode_present(struct clk_mgr *clk_mgr_base); struct clk_mgr_internal *dcn401_clk_mgr_construct(struct dc_context *ctx, struct dccg *dccg); void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type); + #endif /* __DCN401_CLK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 21c35528f61f..3a263840893e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -57,6 +57,8 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin /* Wait for response register to be ready */ dcn401_smu_wait_for_response(clk_mgr, 10, 200000); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -71,9 +73,11 @@ static bool dcn401_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uin if (param_out) *param_out = REG_READ(DAL_ARG_REG); + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); return false; } @@ -102,8 +106,6 @@ static uint32_t dcn401_smu_wait_for_response_delay(struct clk_mgr_internal *clk_ *total_delay_us += delay_us; } while (max_retries--); - TRACE_SMU_DELAY(*total_delay_us, clk_mgr->base.ctx); - return reg; } @@ -115,6 +117,8 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Wait for response register to be ready */ dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay1_us); + TRACE_SMU_MSG_ENTER(msg_id, param_in, clk_mgr->base.ctx); + /* Clear response register */ REG_WRITE(DAL_RESP_REG, 0); @@ -124,18 +128,71 @@ static bool dcn401_smu_send_msg_with_param_delay(struct clk_mgr_internal *clk_mg /* Trigger the message transaction by writing the message ID */ REG_WRITE(DAL_MSG_REG, msg_id); - TRACE_SMU_MSG(msg_id, param_in, clk_mgr->base.ctx); - /* Wait for response */ if (dcn401_smu_wait_for_response_delay(clk_mgr, 10, 200000, &delay2_us) == DALSMC_Result_OK) { if (param_out) *param_out = REG_READ(DAL_ARG_REG); *total_delay_us = delay1_us + delay2_us; + TRACE_SMU_MSG_EXIT(true, param_out ? *param_out : 0, clk_mgr->base.ctx); return true; } *total_delay_us = delay1_us + 2000000; + TRACE_SMU_MSG_EXIT(false, 0, clk_mgr->base.ctx); + return false; +} + +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version) +{ + smu_print("SMU Get SMU version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetSmuVersion, 0, version)) { + + smu_print("SMU version: %d\n", *version); + + return true; + } + + return false; +} + +/* Message output should match SMU11_DRIVER_IF_VERSION in smu11_driver_if.h */ +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check driver if version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDriverIfVersion, 0, &response)) { + + smu_print("SMU driver if version: %d\n", response); + + if (response == SMU14_DRIVER_IF_VERSION) + return true; + } + + return false; +} + +/* Message output should match DALSMC_VERSION in dalsmc.h */ +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr) +{ + uint32_t response = 0; + + smu_print("SMU Check msg header version\n"); + + if (dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) { + + smu_print("SMU msg header version: %d\n", response); + + if (response == DALSMC_VERSION) + return true; + } + return false; } @@ -163,6 +220,22 @@ void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsi smu_print("Numways for SubVP : %d\n", num_ways); } +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) +{ + smu_print("SMU Set DRAM addr high: %d\n", addr_high); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL); +} + +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low) +{ + smu_print("SMU Set DRAM addr low: %d\n", addr_low); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL); +} + void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) { smu_print("SMU Transfer WM table DRAM 2 SMU\n"); @@ -348,3 +421,52 @@ unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr return response; } + +/* + * Frequency in MHz returned in lower 16 bits for valid DPM level + * + * Call with dpm_level = 0xFF to query features, return value will be: + * Bits 7:0 - number of DPM levels + * Bit 28 - 1 = auto DPM on + * Bit 29 - 1 = sweep DPM on + * Bit 30 - 1 = forced DPM on + * Bit 31 - 0 = discrete, 1 = fine-grained + * + * With fine-grained DPM, only min and max frequencies will be reported + * + * Returns 0 on failure + */ +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type, lower 8 bits for DPM level */ + uint32_t param = (clk << 16) | dpm_level; + + smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDpmFreqByIndex, param, &response); + + smu_print("SMU dpm freq: %d MHz\n", response); + + return response; +} + +/* Returns the max DPM frequency in DC mode in MHz, 0 on failure */ +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk) +{ + uint32_t response = 0; + + /* bits 23:16 for clock type */ + uint32_t param = clk << 16; + + smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk); + + dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response); + + smu_print("SMU DC mode max DMP freq: %d MHz\n", response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index e02eb1294b37..4f5ac603e822 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -7,11 +7,17 @@ #include "os_types.h" #include "core_types.h" -#include "dcn32/dcn32_clk_mgr_smu_msg.h" +struct clk_mgr_internal; + +bool dcn401_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version); +bool dcn401_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr); +bool dcn401_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr); void dcn401_smu_send_fclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_uclk_pstate_message(struct clk_mgr_internal *clk_mgr, bool support); void dcn401_smu_send_cab_for_uclk_message(struct clk_mgr_internal *clk_mgr, unsigned int num_ways); +void dcn401_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high); +void dcn401_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low); void dcn401_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn401_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr); unsigned int dcn401_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint16_t freq_mhz); @@ -29,5 +35,7 @@ bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, uint32_t clk); +unsigned int dcn401_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, uint32_t clk, uint8_t dpm_level); #endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 56d011a1323c..5f2d5638c819 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -60,7 +60,7 @@ #include "link_encoder.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #include "dm_helpers.h" #include "mem_input.h" @@ -84,6 +84,7 @@ #if defined(CONFIG_DRM_AMD_DC_FP) #include "dml2/dml2_internal_types.h" +#include "soc_and_ip_translator.h" #endif #include "dce/dmub_outbox.h" @@ -217,11 +218,24 @@ static bool create_links( connectors_num, num_virtual_links); - // condition loop on link_count to allow skipping invalid indices + /* When getting the number of connectors, the VBIOS reports the number of valid indices, + * but it doesn't say which indices are valid, and not every index has an actual connector. + * So, if we don't find a connector on an index, that is not an error. + * + * - There is no guarantee that the first N indices will be valid + * - VBIOS may report a higher amount of valid indices than there are actual connectors + * - Some VBIOS have valid configurations for more connectors than there actually are + * on the card. This may be because the manufacturer used the same VBIOS for different + * variants of the same card. + */ for (i = 0; dc->link_count < connectors_num && i < MAX_LINKS; i++) { + struct graphics_object_id connector_id = bios->funcs->get_connector_id(bios, i); struct link_init_data link_init_params = {0}; struct dc_link *link; + if (connector_id.id == CONNECTOR_ID_UNKNOWN) + continue; + DC_LOG_DC("BIOS object table - printing link object info for connector number: %d, link_index: %d", i, dc->link_count); link_init_params.ctx = dc->ctx; @@ -241,6 +255,7 @@ static bool create_links( DC_LOG_DC("BIOS object table - end"); /* Create a link for each usb4 dpia port */ + dc->lowest_dpia_link_index = MAX_LINKS; for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) { struct link_init_data link_init_params = {0}; struct dc_link *link; @@ -253,6 +268,9 @@ static bool create_links( link = dc->link_srv->create_link(&link_init_params); if (link) { + if (dc->lowest_dpia_link_index > dc->link_count) + dc->lowest_dpia_link_index = dc->link_count; + dc->links[dc->link_count] = link; link->dc = dc; ++dc->link_count; @@ -442,7 +460,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * avoid conflicting with firmware updates. */ if (dc->ctx->dce_version > DCE_VERSION_MAX) { - if (dc->optimized_required || dc->wm_optimized_required) { + if (dc->optimized_required && + (stream->adjust.v_total_max != adjust->v_total_max || + stream->adjust.v_total_min != adjust->v_total_min)) { stream->adjust.timing_adjust_pending = true; return false; } @@ -930,21 +950,24 @@ static void dc_destruct(struct dc *dc) } dc_destroy_resource_pool(dc); - +#ifdef CONFIG_DRM_AMD_DC_FP + dc_destroy_soc_and_ip_translator(&dc->soc_and_ip_translator); +#endif if (dc->link_srv) link_destroy_link_service(&dc->link_srv); - if (dc->ctx->gpio_service) - dal_gpio_service_destroy(&dc->ctx->gpio_service); + if (dc->ctx) { + if (dc->ctx->gpio_service) + dal_gpio_service_destroy(&dc->ctx->gpio_service); - if (dc->ctx->created_bios) - dal_bios_parser_destroy(&dc->ctx->dc_bios); + if (dc->ctx->created_bios) + dal_bios_parser_destroy(&dc->ctx->dc_bios); + kfree(dc->ctx->logger); + dc_perf_trace_destroy(&dc->ctx->perf_trace); - kfree(dc->ctx->logger); - dc_perf_trace_destroy(&dc->ctx->perf_trace); - - kfree(dc->ctx); - dc->ctx = NULL; + kfree(dc->ctx); + dc->ctx = NULL; + } kfree(dc->bw_vbios); dc->bw_vbios = NULL; @@ -972,6 +995,8 @@ static bool dc_construct_ctx(struct dc *dc, if (!dc_ctx) return false; + dc_stream_init_rmcm_3dlut(dc); + dc_ctx->cgs_device = init_params->cgs_device; dc_ctx->driver_context = init_params->driver; dc_ctx->dc = dc; @@ -1131,6 +1156,9 @@ static bool dc_construct(struct dc *dc, dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); DC_FP_END(); } + dc->soc_and_ip_translator = dc_create_soc_and_ip_translator(dc_ctx->dce_version); + if (!dc->soc_and_ip_translator) + goto fail; #endif if (!create_links(dc, init_params->num_virtual_links)) @@ -2377,7 +2405,7 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params context->power_source = params->power_source; - res = dc_validate_with_context(dc, set, params->stream_count, context, false); + res = dc_validate_with_context(dc, set, params->stream_count, context, DC_VALIDATE_MODE_AND_PROGRAMMING); /* * Only update link encoder to stream assignment after bandwidth validation passed. @@ -2391,6 +2419,18 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params goto fail; } + /* + * If not already seamless, make transition seamless by inserting intermediate minimal transition + */ + if (dc->hwss.is_pipe_topology_transition_seamless && + !dc->hwss.is_pipe_topology_transition_seamless(dc, dc->current_state, context)) { + res = commit_minimal_transition_state(dc, context); + if (res != DC_OK) { + BREAK_TO_DEBUGGER(); + goto fail; + } + } + res = dc_commit_state_no_check(dc, context); for (i = 0; i < params->stream_count; i++) { @@ -2537,7 +2577,6 @@ void dc_post_update_surfaces_to_stream(struct dc *dc) } dc->optimized_required = false; - dc->wm_optimized_required = false; } bool dc_set_generic_gpio_for_stereo(bool enable, @@ -3016,8 +3055,6 @@ enum surface_update_type dc_check_update_surfaces_for_stream( } else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) { dc->optimized_required = true; } - - dc->optimized_required |= dc->wm_optimized_required; } return type; @@ -3273,6 +3310,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->adaptive_sync_infopacket) stream->adaptive_sync_infopacket = *update->adaptive_sync_infopacket; + if (update->avi_infopacket) + stream->avi_infopacket = *update->avi_infopacket; + if (update->dither_option) stream->dither_option = *update->dither_option; @@ -3300,7 +3340,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (dsc_validate_context) { stream->timing.dsc_cfg = *update->dsc_config; stream->timing.flags.DSC = enable_dsc; - if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, true) != DC_OK) { + if (dc->res_pool->funcs->validate_bandwidth(dc, dsc_validate_context, + DC_VALIDATE_MODE_ONLY) != DC_OK) { stream->timing.dsc_cfg = old_dsc_cfg; stream->timing.flags.DSC = old_dsc_enabled; update->dsc_config = NULL; @@ -3369,7 +3410,7 @@ static void update_seamless_boot_flags(struct dc *dc, int surface_count, struct dc_stream_state *stream) { - if (get_seamless_boot_stream_count(context) > 0 && surface_count > 0) { + if (get_seamless_boot_stream_count(context) > 0 && (surface_count > 0 || stream->dpms_off)) { /* Optimize seamless boot flag keeps clocks and watermarks high until * first flip. After first flip, optimization is required to lower * bandwidth. Important to note that it is expected UEFI will @@ -3522,7 +3563,7 @@ static bool update_planes_and_stream_state(struct dc *dc, } if (update_type == UPDATE_TYPE_FULL) { - if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) { + if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) { BREAK_TO_DEBUGGER(); goto fail; } @@ -3566,7 +3607,8 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->vsp_infopacket || stream_update->hfvsif_infopacket || stream_update->adaptive_sync_infopacket || - stream_update->vtem_infopacket) { + stream_update->vtem_infopacket || + stream_update->avi_infopacket) { resource_build_info_frame(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -4128,7 +4170,7 @@ static void commit_planes_for_stream(struct dc *dc, } if (dc->hwseq->funcs.wait_for_pipe_update_if_needed) - dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type == UPDATE_TYPE_FAST); + dc->hwseq->funcs.wait_for_pipe_update_if_needed(dc, top_pipe_to_program, update_type < UPDATE_TYPE_FULL); if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) @@ -4628,7 +4670,8 @@ static struct dc_state *create_minimal_transition_state(struct dc *dc, backup_and_set_minimal_pipe_split_policy(dc, base_context, policy); /* commit minimal state */ - if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, false) == DC_OK) { + if (dc->res_pool->funcs->validate_bandwidth(dc, minimal_transition_context, + DC_VALIDATE_MODE_AND_PROGRAMMING) == DC_OK) { /* prevent underflow and corruption when reconfiguring pipes */ force_vsync_flip_in_minimal_transition_context(minimal_transition_context); } else { @@ -5037,6 +5080,7 @@ static bool full_update_required(struct dc *dc, stream_update->hfvsif_infopacket || stream_update->vtem_infopacket || stream_update->adaptive_sync_infopacket || + stream_update->avi_infopacket || stream_update->dpms_off || stream_update->allow_freesync || stream_update->vrr_active_variable || @@ -5080,129 +5124,6 @@ static bool fast_update_only(struct dc *dc, && !full_update_required(dc, srf_updates, surface_count, stream_update, stream); } -static bool update_planes_and_stream_v1(struct dc *dc, - struct dc_surface_update *srf_updates, int surface_count, - struct dc_stream_state *stream, - struct dc_stream_update *stream_update, - struct dc_state *state) -{ - const struct dc_stream_status *stream_status; - enum surface_update_type update_type; - struct dc_state *context; - struct dc_context *dc_ctx = dc->ctx; - int i, j; - struct dc_fast_update fast_update[MAX_SURFACES] = {0}; - - dc_exit_ips_for_hw_access(dc); - - populate_fast_updates(fast_update, srf_updates, surface_count, stream_update); - stream_status = dc_stream_get_status(stream); - context = dc->current_state; - - update_type = dc_check_update_surfaces_for_stream( - dc, srf_updates, surface_count, stream_update, stream_status); - /* It is possible to receive a flip for one plane while there are multiple flip_immediate planes in the same stream. - * E.g. Desktop and MPO plane are flip_immediate but only the MPO plane received a flip - * Force the other flip_immediate planes to flip so GSL doesn't wait for a flip that won't come. - */ - force_immediate_gsl_plane_flip(dc, srf_updates, surface_count); - - if (update_type >= UPDATE_TYPE_FULL) { - - /* initialize scratch memory for building context */ - context = dc_state_create_copy(state); - if (context == NULL) { - DC_ERROR("Failed to allocate new validate context!\n"); - return false; - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - - if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) - new_pipe->plane_state->force_full_update = true; - } - } else if (update_type == UPDATE_TYPE_FAST) { - /* - * Previous frame finished and HW is ready for optimization. - */ - dc_post_update_surfaces_to_stream(dc); - } - - for (i = 0; i < surface_count; i++) { - struct dc_plane_state *surface = srf_updates[i].surface; - - copy_surface_update_to_plane(surface, &srf_updates[i]); - - if (update_type >= UPDATE_TYPE_MED) { - for (j = 0; j < dc->res_pool->pipe_count; j++) { - struct pipe_ctx *pipe_ctx = - &context->res_ctx.pipe_ctx[j]; - - if (pipe_ctx->plane_state != surface) - continue; - - resource_build_scaling_params(pipe_ctx); - } - } - } - - copy_stream_update_to_stream(dc, context, stream, stream_update); - - if (update_type >= UPDATE_TYPE_FULL) { - if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) { - DC_ERROR("Mode validation failed for stream update!\n"); - dc_state_release(context); - return false; - } - } - - TRACE_DC_PIPE_STATE(pipe_ctx, i, MAX_PIPES); - - if (fast_update_only(dc, fast_update, srf_updates, surface_count, stream_update, stream) && - !dc->debug.enable_legacy_fast_update) { - commit_planes_for_stream_fast(dc, - srf_updates, - surface_count, - stream, - stream_update, - update_type, - context); - } else { - commit_planes_for_stream( - dc, - srf_updates, - surface_count, - stream, - stream_update, - update_type, - context); - } - /*update current_State*/ - if (dc->current_state != context) { - - struct dc_state *old = dc->current_state; - - dc->current_state = context; - dc_state_release(old); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->plane_state && pipe_ctx->stream == stream) - pipe_ctx->plane_state->force_full_update = false; - } - } - - /* Legacy optimization path for DCE. */ - if (update_type >= UPDATE_TYPE_FULL && dc_ctx->dce_version < DCE_VERSION_MAX) { - dc_post_update_surfaces_to_stream(dc); - TRACE_DCE_CLOCK_STATE(&context->bw_ctx.bw.dce); - } - return true; -} - static bool update_planes_and_stream_v2(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, @@ -5435,8 +5356,8 @@ bool dc_update_planes_and_stream(struct dc *dc, else ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - - if (ret) + if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_01)) clear_update_flags(srf_updates, surface_count, stream); return ret; @@ -5460,14 +5381,12 @@ void dc_commit_updates_for_stream(struct dc *dc, if (dc->ctx->dce_version >= DCN_VERSION_4_01) { ret = update_planes_and_stream_v3(dc, srf_updates, surface_count, stream, stream_update); - } else if (dc->ctx->dce_version >= DCN_VERSION_3_2) { + } else { ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - } else - ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, - stream_update, state); + } - if (ret) + if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) clear_update_flags(srf_updates, surface_count, stream); } @@ -5540,6 +5459,15 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); } break; + case DC_ACPI_CM_POWER_STATE_D3: + if (dc->caps.ips_support) + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); + + if (dc->caps.ips_v2_support) { + if (dc->clk_mgr->funcs->set_low_power_state) + dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr); + } + break; default: ASSERT(dc->current_state->stream_count == 0); dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); @@ -5696,8 +5624,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); } } - - DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", + if (!dc->caps.is_apu) + DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2], subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name); @@ -6337,13 +6265,14 @@ void dc_set_edp_power(const struct dc *dc, struct dc_link *edp_link, edp_link->dc->link_srv->edp_set_panel_power(edp_link, powerOn); } -/* - ***************************************************************************** +/** * dc_get_power_profile_for_dc_state() - extracts power profile from dc state * * Called when DM wants to make power policy decisions based on dc_state * - ***************************************************************************** + * @context: Pointer to the dc_state from which the power profile is extracted. + * + * Return: The power profile structure containing the power level information. */ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context) { @@ -6359,13 +6288,14 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state return profile; } -/* - ********************************************************************************** +/** * dc_get_det_buffer_size_from_state() - extracts detile buffer size from dc state * - * Called when DM wants to log detile buffer size from dc_state + * This function is called to log the detile buffer size from the dc_state. * - ********************************************************************************** + * @context: a pointer to the dc_state from which the detile buffer size is extracted. + * + * Return: the size of the detile buffer, or 0 if not available. */ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) { @@ -6377,6 +6307,36 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) return 0; } +/** + * dc_get_host_router_index: Get index of host router from a dpia link + * + * This function return a host router index of the target link. If the target link is dpia link. + * + * @link: Pointer to the target link (input) + * @host_router_index: Pointer to store the host router index of the target link (output). + * + * Return: true if the host router index is found and valid. + * + */ +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index) +{ + struct dc *dc; + + if (!link || !host_router_index || link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + dc = link->ctx->dc; + + if (link->link_index < dc->lowest_dpia_link_index) + return false; + + *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router; + if (*host_router_index < dc->caps.num_of_host_routers) + return true; + else + return false; +} + bool dc_is_cursor_limit_pending(struct dc *dc) { uint32_t i; @@ -6400,3 +6360,21 @@ bool dc_can_clear_cursor_limit(struct dc *dc) return false; } + +void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, + struct dc_underflow_debug_data *out_data) +{ + struct timing_generator *tg = NULL; + + for (int i = 0; i < MAX_PIPES; i++) { + if (dc->res_pool->timing_generators[i] && + dc->res_pool->timing_generators[i]->inst == primary_otg_inst) { + tg = dc->res_pool->timing_generators[i]; + break; + } + } + + dc_exit_ips_for_hw_access(dc); + if (dc->hwss.get_underflow_debug_data) + dc->hwss.get_underflow_debug_data(dc, tg, out_data); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 7551d0a3fe82..bbce751b485f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -268,6 +268,8 @@ char *dc_status_to_str(enum dc_status status) return "Insufficient DP link bandwidth"; case DC_FAIL_HW_CURSOR_SUPPORT: return "HW Cursor not supported"; + case DC_FAIL_DP_TUNNEL_BW_VALIDATE: + return "Fail DP Tunnel BW validation"; case DC_ERROR_UNEXPECTED: return "Unexpected error"; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 7014b8d000bb..d82b1cb467f4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -427,6 +427,32 @@ void get_hdr_visual_confirm_color( } } +/* Visual Confirm color definition for Smart Mux */ +void get_smartmux_visual_confirm_color( + struct dc *dc, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + + const struct tg_color sm_ver_colors[5] = { + {0, 0, 0}, /* SMUX_MUXCONTROL_UNSUPPORTED - Black */ + {0, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_v10 - Green */ + {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_v15 - Cyan */ + {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_MDM - Yellow */ + {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_vUNKNOWN - Magenta*/ + }; + + if (dc->caps.is_apu) { + /* APU driving the eDP */ + *color = sm_ver_colors[dc->config.smart_mux_version]; + } else { + /* dGPU driving the eDP - red */ + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + } +} + /* Visual Confirm color definition for VABC */ void get_vabc_visual_confirm_color( struct pipe_ctx *pipe_ctx, @@ -1151,6 +1177,8 @@ void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *c tg = otg_master->stream_res.tg; if (tg->funcs->wait_odm_doublebuffer_pending_clear) tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + if (tg->funcs->wait_otg_disable) + tg->funcs->wait_otg_disable(tg); } /* ODM update may require to reprogram blank pattern for each OPP */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 814f68d76257..a180f68f711c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -24,7 +24,7 @@ #include "link_enc_cfg.h" #include "resource.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER dc->ctx->logger diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index 71e15da4bb69..9acd30019717 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -33,8 +33,9 @@ * dc.h with detail interface documentation, then add function implementation * in this file which calls link functions. */ -#include "link.h" +#include "link_service.h" #include "dce/dce_i2c.h" + struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index) { if (link_index >= MAX_LINKS) @@ -515,7 +516,15 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) link->dc->link_srv->enable_hpd_filter(link, enable); } -bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) +enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx) { - return dc->link_srv->validate_dpia_bandwidth(streams, count); + return dc->link_srv->validate_dp_tunnel_bandwidth(dc, new_ctx); } + +void dc_link_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support) +{ + link->dc->link_srv->edp_get_alpm_support(link, auxless_support, auxwake_support); +} + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 3da25bd8b578..bc5dedf5f60c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -40,7 +40,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dpcd_defs.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #include "clk_mgr.h" #include "dc_state_priv.h" #include "dc_stream_priv.h" @@ -95,7 +95,6 @@ #define DC_LOGGER \ dc->ctx->logger #define DC_LOGGER_INIT(logger) - #include "dml2/dml2_wrapper.h" #define UNABLE_TO_SPLIT -1 @@ -165,7 +164,13 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) case FAMILY_NV: dc_version = DCN_VERSION_2_0; - if (asic_id.chip_id == DEVICE_ID_NV_13FE || asic_id.chip_id == DEVICE_ID_NV_143F) { + if (asic_id.chip_id == DEVICE_ID_NV_13FE || + asic_id.chip_id == DEVICE_ID_NV_143F || + asic_id.chip_id == DEVICE_ID_NV_13F9 || + asic_id.chip_id == DEVICE_ID_NV_13FA || + asic_id.chip_id == DEVICE_ID_NV_13FB || + asic_id.chip_id == DEVICE_ID_NV_13FC || + asic_id.chip_id == DEVICE_ID_NV_13DB) { dc_version = DCN_VERSION_2_01; break; } @@ -2143,7 +2148,7 @@ int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master, h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right + - otg_master->hblank_borrow; + otg_master->dsc_padding_params.dsc_hactive_padding; width = h_active / count; if (otg_master->stream_res.tg) @@ -3940,7 +3945,9 @@ enum dc_status resource_map_pool_resources( /* TODO: Add check if ASIC support and EDID audio */ if (!stream->converter_disable_audio && dc_is_audio_capable_signal(pipe_ctx->stream->signal) && - stream->audio_info.mode_count && stream->audio_info.flags.all) { + stream->audio_info.mode_count && + (stream->audio_info.flags.all || + (stream->sink && stream->sink->edid_caps.panel_patch.skip_audio_sab_check))) { pipe_ctx->stream_res.audio = find_first_free_audio( &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version); @@ -4053,7 +4060,7 @@ static bool add_all_planes_for_stream( * @set: An array of dc_validation_set with all the current streams reference * @set_count: Total of streams * @context: New context - * @fast_validate: Enable or disable fast validation + * @validate_mode: identify the validation mode * * This function updates the potential new stream in the context object. It * creates multiple lists for the add, remove, and unchanged streams. In @@ -4068,7 +4075,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, const struct dc_validation_set set[], int set_count, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { struct dc_stream_state *unchanged_streams[MAX_PIPES] = { 0 }; struct dc_stream_state *del_streams[MAX_PIPES] = { 0 }; @@ -4242,7 +4249,7 @@ enum dc_status dc_validate_with_context(struct dc *dc, dc_state_set_stream_subvp_cursor_limit(context->streams[i], context, false); } - res = dc_validate_global_state(dc, context, fast_validate); + res = dc_validate_global_state(dc, context, validate_mode); /* calculate pixel rate divider after deciding pxiel clock & odm combine */ if ((dc->hwss.calculate_pix_rate_divider) && (res == DC_OK)) { @@ -4259,39 +4266,33 @@ fail: return res; } +#if defined(CONFIG_DRM_AMD_DC_FP) +#endif /* CONFIG_DRM_AMD_DC_FP */ + /** - * decide_hblank_borrow - Decides the horizontal blanking borrow value for a given pipe context. + * calculate_timing_params_for_dsc_with_padding - Calculates timing parameters for DSC with padding. * @pipe_ctx: Pointer to the pipe context structure. * - * This function calculates the horizontal blanking borrow value for a given pipe context based on the + * This function calculates the timing parameters for a given pipe context based on the * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less - * than the total width of the DSC slices, it sets the hblank_borrow value to the difference. If the - * total horizontal timing minus the hblank_borrow value is less than 32, it resets the hblank_borrow + * than the total width of the DSC slices, it sets the dsc_hactive_padding value to the difference. If the + * total horizontal timing minus the dsc_hactive_padding value is less than 32, it resets the dsc_hactive_padding * value to 0. */ -static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx) +static void calculate_timing_params_for_dsc_with_padding(struct pipe_ctx *pipe_ctx) { - uint32_t hactive; - uint32_t ceil_slice_width; struct dc_stream_state *stream = NULL; if (!pipe_ctx) return; stream = pipe_ctx->stream; + pipe_ctx->dsc_padding_params.dsc_hactive_padding = 0; + pipe_ctx->dsc_padding_params.dsc_htotal_padding = 0; - if (stream->timing.flags.DSC) { - hactive = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; - - /* Assume if determined slices does not divide Hactive evenly, Hborrow is needed for padding*/ - if (hactive % stream->timing.dsc_cfg.num_slices_h != 0) { - ceil_slice_width = (hactive / stream->timing.dsc_cfg.num_slices_h) + 1; - pipe_ctx->hblank_borrow = ceil_slice_width * stream->timing.dsc_cfg.num_slices_h - hactive; + if (stream) + pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz = stream->timing.pix_clk_100hz; - if (stream->timing.h_total - hactive - pipe_ctx->hblank_borrow < 32) - pipe_ctx->hblank_borrow = 0; - } - } } /** @@ -4299,7 +4300,7 @@ static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx) * * @dc: dc struct for this driver * @new_ctx: state to be validated - * @fast_validate: set to true if only yes/no to support matters + * @validate_mode: identify the validation mode * * Checks hardware resource availability and bandwidth requirement. * @@ -4309,7 +4310,7 @@ static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx) enum dc_status dc_validate_global_state( struct dc *dc, struct dc_state *new_ctx, - bool fast_validate) + enum dc_validate_mode validate_mode) { enum dc_status result = DC_ERROR_UNEXPECTED; int i, j; @@ -4334,7 +4335,7 @@ enum dc_status dc_validate_global_state( /* Decide whether hblank borrow is needed and save it in pipe_ctx */ if (dc->debug.enable_hblank_borrow) - decide_hblank_borrow(pipe_ctx); + calculate_timing_params_for_dsc_with_padding(pipe_ctx); if (dc->res_pool->funcs->patch_unknown_plane_state && pipe_ctx->plane_state && @@ -4368,7 +4369,7 @@ enum dc_status dc_validate_global_state( result = resource_build_scaling_params_for_context(dc, new_ctx); if (result == DC_OK) - result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate); + result = dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, validate_mode); return result; } @@ -4409,8 +4410,14 @@ static void set_avi_info_frame( unsigned int fr_ind = pipe_ctx->stream->timing.fr_index; enum dc_timing_3d_format format; + if (stream->avi_infopacket.valid) { + *info_packet = stream->avi_infopacket; + return; + } + memset(&hdmi_info, 0, sizeof(union hdmi_info_packet)); + color_space = pipe_ctx->stream->output_color_space; if (color_space == COLOR_SPACE_UNKNOWN) color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index fe9f99f1bdf9..f976ffd6d466 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -65,7 +65,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = - get_link_index_from_dpia_port_index(dc, notify->link_index); + get_link_index_from_dpia_port_index(dc, notify->instance); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 4db7383720fd..c61300a7cb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -194,11 +194,6 @@ static void init_state(struct dc *dc, struct dc_state *state) struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *params) { struct dc_state *state; -#ifdef CONFIG_DRM_AMD_DC_FP - struct dml2_configuration_options *dml2_opt = &dc->dml2_tmp; - - memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); -#endif state = kvzalloc(sizeof(struct dc_state), GFP_KERNEL); @@ -211,14 +206,12 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) { - dml2_opt->use_clock_dc_limits = false; - if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2)) { + if (!dml2_create(dc, &dc->dml2_options, &state->bw_ctx.dml2)) { dc_state_release(state); return NULL; } - dml2_opt->use_clock_dc_limits = true; - if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source)) { + if (dc->caps.dcmode_power_limits_present && !dml2_create(dc, &dc->dml2_dc_power_options, &state->bw_ctx.dml2_dc_power_source)) { dc_state_release(state); return NULL; } @@ -434,6 +427,8 @@ enum dc_status dc_state_remove_stream( return DC_ERROR_UNEXPECTED; } + dc_stream_release_3dlut_for_stream(dc, stream); + dc_stream_release(state->streams[i]); state->stream_count--; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index b883fb24fa12..9ac2d41f8fca 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -316,6 +316,9 @@ bool dc_stream_set_cursor_attributes( { bool result = false; + if (!stream) + return false; + if (dc_stream_check_cursor_attributes(stream, stream->ctx->dc->current_state, attributes)) { stream->cursor_attributes = *attributes; result = true; @@ -331,7 +334,10 @@ bool dc_stream_program_cursor_attributes( struct dc *dc; bool reset_idle_optimizations = false; - dc = stream ? stream->ctx->dc : NULL; + if (!stream) + return false; + + dc = stream->ctx->dc; if (dc_stream_set_cursor_attributes(stream, attributes)) { dc_z10_restore(dc); @@ -857,6 +863,73 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) } /* +* dc_stream_get_3dlut() +* Requirements: +* 1. Is stream already owns an RMCM instance, return it. +* 2. If it doesn't and we don't need to allocate, return NULL. +* 3. If there's a free RMCM instance, assign to stream and return it. +* 4. If no free RMCM instances, return NULL. +*/ + +struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream, + bool allocate_one) +{ + unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts; + + // see if one is allocated for this stream + for (int i = 0; i < num_rmcm; i++) { + if (dc->res_pool->rmcm_3dlut[i].isInUse && + dc->res_pool->rmcm_3dlut[i].stream == stream) + return &dc->res_pool->rmcm_3dlut[i]; + } + + //case: not found one, and dont need to allocate + if (!allocate_one) + return NULL; + + //see if there is an unused 3dlut, allocate + for (int i = 0; i < num_rmcm; i++) { + if (!dc->res_pool->rmcm_3dlut[i].isInUse) { + dc->res_pool->rmcm_3dlut[i].isInUse = true; + dc->res_pool->rmcm_3dlut[i].stream = stream; + return &dc->res_pool->rmcm_3dlut[i]; + } + } + + //dont have a 3dlut + return NULL; +} + + +void dc_stream_release_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream) +{ + struct dc_rmcm_3dlut *rmcm_3dlut = + dc_stream_get_3dlut_for_stream(dc, stream, false); + + if (rmcm_3dlut) { + rmcm_3dlut->isInUse = false; + rmcm_3dlut->stream = NULL; + rmcm_3dlut->protection_bits = 0; + } +} + + +void dc_stream_init_rmcm_3dlut(struct dc *dc) +{ + unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts; + + for (int i = 0; i < num_rmcm; i++) { + dc->res_pool->rmcm_3dlut[i].isInUse = false; + dc->res_pool->rmcm_3dlut[i].stream = NULL; + dc->res_pool->rmcm_3dlut[i].protection_bits = 0; + } +} + +/* * Finds the greatest index in refresh_rate_hz that contains a value <= refresh */ static int dc_stream_get_nearest_smallest_index(struct dc_stream_state *stream, int refresh) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1d917be36fc4..98f0b6b3c213 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -46,6 +46,8 @@ #include "dmub/inc/dmub_cmd.h" +#include "sspl/dc_spl_types.h" + struct abm_save_restore; /* forward declaration */ @@ -53,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.334" +#define DC_VER "3.2.351" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -66,7 +68,11 @@ struct dmub_notification; #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 #define MAX_NUM_EDP 2 -#define MAX_HOST_ROUTERS_NUM 2 +#define MAX_SUPPORTED_FORMATS 7 + +#define MAX_HOST_ROUTERS_NUM 3 +#define MAX_DPIA_PER_HOST_ROUTER 3 +#define MAX_DPIA_NUM (MAX_HOST_ROUTERS_NUM * MAX_DPIA_PER_HOST_ROUTER) /* Display Core Interfaces */ struct dc_versions { @@ -192,6 +198,34 @@ struct dpp_color_caps { struct rom_curve_caps ogam_rom_caps; }; +/* Below structure is to describe the HW support for mem layout, extend support + range to match what OS could handle in the roadmap */ +struct lut3d_caps { + uint32_t dma_3d_lut : 1; /*< DMA mode support for 3D LUT */ + struct { + uint32_t swizzle_3d_rgb : 1; + uint32_t swizzle_3d_bgr : 1; + uint32_t linear_1d : 1; + } mem_layout_support; + struct { + uint32_t unorm_12msb : 1; + uint32_t unorm_12lsb : 1; + uint32_t float_fp1_5_10 : 1; + } mem_format_support; + struct { + uint32_t order_rgba : 1; + uint32_t order_bgra : 1; + } mem_pixel_order_support; + /*< size options are 9, 17, 33, 45, 65 */ + struct { + uint32_t dim_9 : 1; /* 3D LUT support for 9x9x9 */ + uint32_t dim_17 : 1; /* 3D LUT support for 17x17x17 */ + uint32_t dim_33 : 1; /* 3D LUT support for 33x33x33 */ + uint32_t dim_45 : 1; /* 3D LUT support for 45x45x45 */ + uint32_t dim_65 : 1; /* 3D LUT support for 65x65x65 */ + } lut_dim_caps; +}; + /** * struct mpc_color_caps - color pipeline capabilities for multiple pipe and * plane combined blocks @@ -200,17 +234,25 @@ struct dpp_color_caps { * @ogam_ram: programmable out gamma LUT * @ocsc: output color space conversion matrix * @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT + * @num_rmcm_3dluts: number of RMCM 3D LUTS; always assumes a preceding shaper LUT * @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single * instance * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT + * @mcm_3d_lut_caps: HW support cap for MCM LUT memory + * @rmcm_3d_lut_caps: HW support cap for RMCM LUT memory + * @preblend: whether color manager supports preblend with MPC */ struct mpc_color_caps { uint16_t gamut_remap : 1; uint16_t ogam_ram : 1; uint16_t ocsc : 1; uint16_t num_3dluts : 3; + uint16_t num_rmcm_3dluts : 3; uint16_t shared_3d_lut:1; struct rom_curve_caps ogam_rom_caps; + struct lut3d_caps mcm_3d_lut_caps; + struct lut3d_caps rmcm_3d_lut_caps; + bool preblend; }; /** @@ -270,6 +312,7 @@ struct dc_caps { bool dmcub_support; bool zstate_support; bool ips_support; + bool ips_v2_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; @@ -305,6 +348,10 @@ struct dc_caps { /* Conservative limit for DCC cases which require ODM4:1 to support*/ uint32_t dcc_plane_width_limit; struct dc_scl_caps scl_caps; + uint8_t num_of_host_routers; + uint8_t num_of_dpias_per_host_router; + /* limit of the ODM only, could be limited by other factors (like pipe count)*/ + uint8_t max_odm_combine_factor; }; struct dc_bug_wa { @@ -459,6 +506,7 @@ struct dc_config { bool use_spl; bool prefer_easf; bool use_pipe_ctx_sync_logic; + int smart_mux_version; bool ignore_dpref_ss; bool enable_mipi_converter_optimization; bool use_default_clock_table; @@ -469,6 +517,7 @@ struct dc_config { bool EnableMinDispClkODM; bool enable_auto_dpm_test_logs; unsigned int disable_ips; + unsigned int disable_ips_rcg; unsigned int disable_ips_in_vpb; bool disable_ips_in_dpms_off; bool usb4_bw_alloc_support; @@ -481,6 +530,8 @@ struct dc_config { bool set_pipe_unlock_order; bool enable_dpia_pre_training; bool unify_link_enc_assignment; + struct spl_sharpness_range dcn_sharpness_range; + struct spl_sharpness_range dcn_override_sharpness_range; }; enum visual_confirm { @@ -492,6 +543,7 @@ enum visual_confirm { VISUAL_CONFIRM_SWAPCHAIN = 6, VISUAL_CONFIRM_FAMS = 7, VISUAL_CONFIRM_SWIZZLE = 9, + VISUAL_CONFIRM_SMARTMUX_DGPU = 10, VISUAL_CONFIRM_REPLAY = 12, VISUAL_CONFIRM_SUBVP = 14, VISUAL_CONFIRM_MCLK_SWITCH = 16, @@ -643,6 +695,15 @@ struct dc_clocks { int idle_fclk_khz; int subvp_prefetch_dramclk_khz; int subvp_prefetch_fclk_khz; + + /* Stutter efficiency is technically not clock values + * but stored here so the values are part of the update_clocks call similar to num_ways + * Efficiencies are stored as percentage (0-100) + */ + struct { + uint8_t base_efficiency; //LP1 + uint8_t low_power_efficiency; //LP2 + } stutter_efficiency; }; struct dc_bw_validation_profile { @@ -770,6 +831,7 @@ enum pg_hw_resources { PG_DCHVM, PG_DWB, PG_HPO, + PG_DCOH, PG_HW_RESOURCES_NUM_ELEMENT }; @@ -786,9 +848,8 @@ union dpia_debug_options { uint32_t disable_mst_dsc_work_around:1; /* bit 3 */ uint32_t enable_force_tbt3_work_around:1; /* bit 4 */ uint32_t disable_usb4_pm_support:1; /* bit 5 */ - uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ - uint32_t enable_dpia_pre_training:1; /* bit 7 */ - uint32_t unify_link_enc_assignment:1; /* bit 8 */ + uint32_t enable_usb4_bw_zero_alloc_patch:1; /* bit 6 */ + uint32_t enable_bw_allocation_mode:1; /* bit 7 */ uint32_t reserved:24; } bits; uint32_t raw; @@ -915,6 +976,9 @@ struct dc_debug_options { bool disable_dsc_power_gate; bool disable_optc_power_gate; bool disable_hpo_power_gate; + bool disable_io_clk_power_gate; + bool disable_mem_power_gate; + bool disable_dio_power_gate; int dsc_min_slice_height_override; int dsc_bpp_increment_div; bool disable_pplib_wm_range; @@ -1019,6 +1083,7 @@ struct dc_debug_options { unsigned int force_mall_ss_num_ways; bool alloc_extra_way_for_cursor; uint32_t subvp_extra_lines; + bool disable_force_pstate_allow_on_hw_release; bool force_usr_allow; /* uses value at boot and disables switch */ bool disable_dtb_ref_clk_switch; @@ -1092,6 +1157,13 @@ struct dc_debug_options { bool enable_hblank_borrow; bool force_subvp_df_throttle; uint32_t acpi_transition_bitmasks[MAX_PIPES]; + bool enable_pg_cntl_debug_logs; + unsigned int auxless_alpm_lfps_setup_ns; + unsigned int auxless_alpm_lfps_period_ns; + unsigned int auxless_alpm_lfps_silence_ns; + unsigned int auxless_alpm_lfps_t1t2_us; + short auxless_alpm_lfps_t1t2_offset_us; + bool disable_stutter_for_wm_program; }; @@ -1151,7 +1223,7 @@ struct dc_init_data { uint32_t *dcn_reg_offsets; uint32_t *nbio_reg_offsets; uint32_t *clk_reg_offsets; - struct dml2_soc_bb *bb_from_dmub; + void *bb_from_dmub; }; struct dc_callback_init { @@ -1252,6 +1324,38 @@ union dc_3dlut_state { }; +#define MATRIX_9C__DIM_128_ALIGNED_LEN 16 // 9+8 : 9 * 8 + 7 * 8 = 72 + 56 = 128 % 128 = 0 +#define MATRIX_17C__DIM_128_ALIGNED_LEN 32 //17+15: 17 * 8 + 15 * 8 = 136 + 120 = 256 % 128 = 0 +#define MATRIX_33C__DIM_128_ALIGNED_LEN 64 //17+47: 17 * 8 + 47 * 8 = 136 + 376 = 512 % 128 = 0 + +struct lut_rgb { + uint16_t b; + uint16_t g; + uint16_t r; + uint16_t padding; +}; + +//this structure maps directly to how the lut will read it from memory +struct lut_mem_mapping { + union { + //NATIVE MODE 1, 2 + //RGB layout [b][g][r] //red is 128 byte aligned + //BGR layout [r][g][b] //blue is 128 byte aligned + struct lut_rgb rgb_17c[17][17][MATRIX_17C__DIM_128_ALIGNED_LEN]; + struct lut_rgb rgb_33c[33][33][MATRIX_33C__DIM_128_ALIGNED_LEN]; + + //TRANSFORMED + uint16_t linear_rgb[(33*33*33*4/128+1)*128]; + }; + uint16_t size; +}; + +struct dc_rmcm_3dlut { + bool isInUse; + const struct dc_stream_state *stream; + uint8_t protection_bits; +}; + struct dc_3dlut { struct kref refcount; struct tetrahedral_params lut_3d; @@ -1288,7 +1392,6 @@ union surface_update_flags { uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; uint32_t coeff_reduction_change:1; - uint32_t output_tf_change:1; uint32_t pixel_format_change:1; uint32_t plane_size_change:1; uint32_t gamut_remap_change:1; @@ -1389,6 +1492,8 @@ struct dc_plane_state { int sharpness_level; enum linear_light_scaling linear_light_scaling; unsigned int sdr_white_level_nits; + struct spl_sharpness_range sharpness_range; + enum sharpness_range_source sharpness_source; }; struct dc_plane_info { @@ -1570,6 +1675,7 @@ struct dc_scratch_space { bool blank_stream_on_ocs_change; bool read_dpcd204h_on_irq_hpd; bool force_dp_ffe_preset; + bool skip_phy_ssc_reduction; } wa_flags; union dc_dp_ffe_preset forced_dp_ffe_preset; struct link_mst_stream_allocation_table mst_stream_alloc_table; @@ -1579,6 +1685,8 @@ struct dc_scratch_space { struct gpio *hpd_gpio; enum dc_link_fec_state fec_state; + bool is_dds; + bool is_display_mux_present; bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly struct dc_panel_config panel_config; @@ -1603,6 +1711,7 @@ struct dc { uint8_t link_count; struct dc_link *links[MAX_LINKS]; + uint8_t lowest_dpia_link_index; struct link_service *link_srv; struct dc_state *current_state; @@ -1626,12 +1735,15 @@ struct dc { /* Require to optimize clocks and bandwidth for added/removed planes */ bool optimized_required; - bool wm_optimized_required; bool idle_optimizations_allowed; bool enable_c20_dtm_b0; /* Require to maintain clocks and bandwidth for UEFI enabled HW */ + /* For eDP to know the switching state of SmartMux */ + bool is_switch_in_progress_orig; + bool is_switch_in_progress_dest; + /* FBC compressor */ struct compressor *fbc_compressor; @@ -1662,9 +1774,9 @@ struct dc { } scratch; struct dml2_configuration_options dml2_options; - struct dml2_configuration_options dml2_tmp; + struct dml2_configuration_options dml2_dc_power_options; enum dc_acpi_cm_power_state power_state; - + struct soc_and_ip_translator *soc_and_ip_translator; }; struct dc_scaling_info { @@ -1717,6 +1829,23 @@ struct dc_surface_update { struct dc_bias_and_scale bias_and_scale; }; +struct dc_underflow_debug_data { + uint32_t otg_inst; + uint32_t otg_underflow; + uint32_t h_position; + uint32_t v_position; + uint32_t otg_frame_count; + struct dc_underflow_per_hubp_debug_data { + uint32_t hubp_underflow; + uint32_t hubp_in_blank; + uint32_t hubp_readline; + uint32_t det_config_error; + } hubps[MAX_PIPES]; + uint32_t curr_det_sizes[MAX_PIPES]; + uint32_t target_det_sizes[MAX_PIPES]; + uint32_t compbuf_config_error; +}; + /* * Create a new surface with default parameters; */ @@ -1735,8 +1864,6 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut); void dc_post_update_surfaces_to_stream( struct dc *dc); -#include "dc_stream.h" - /** * struct dc_validation_set - Struct to store surface/stream associations for validation */ @@ -1767,19 +1894,15 @@ enum dc_status dc_validate_with_context(struct dc *dc, const struct dc_validation_set set[], int set_count, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); bool dc_set_generic_gpio_for_stereo(bool enable, struct gpio_service *gpio_service); -/* - * fast_validate: we return after determining if we can support the new state, - * but before we populate the programming info - */ enum dc_status dc_validate_global_state( struct dc *dc, struct dc_state *new_ctx, - bool fast_validate); + enum dc_validate_mode validate_mode); bool dc_acquire_release_mpc_3dlut( struct dc *dc, bool acquire, @@ -2375,17 +2498,18 @@ void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( struct dc_link *link, int peak_bw); /* - * Validate the BW of all the valid DPIA links to make sure it doesn't exceed - * available BW for each host router - * - * @dc: pointer to dc struct - * @stream: pointer to all possible streams - * @count: number of valid DPIA streams + * Calculates the DP tunneling bandwidth required for the stream timing + * and aggregates the stream bandwidth for the respective DP tunneling link * - * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE + * return: dc_status */ -bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, - const unsigned int count); +enum dc_status dc_link_validate_dp_tunneling_bandwidth(const struct dc *dc, const struct dc_state *new_ctx); + +/* + * Get if ALPM is supported by the link + */ +void dc_link_get_alpm_support(struct dc_link *link, bool *auxless_support, + bool *auxwake_support); /* Sink Interfaces - A sink corresponds to a display output device */ @@ -2595,6 +2719,8 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index); + /* DSC Interfaces */ #include "dc_dsc.h" @@ -2612,4 +2738,17 @@ bool dc_is_timing_changed(struct dc_stream_state *cur_stream, bool dc_is_cursor_limit_pending(struct dc *dc); bool dc_can_clear_cursor_limit(struct dc *dc); +/** + * dc_get_underflow_debug_data_for_otg() - Retrieve underflow debug data. + * + * @dc: Pointer to the display core context. + * @primary_otg_inst: Instance index of the primary OTG that underflowed. + * @out_data: Pointer to a dc_underflow_debug_data struct to be filled with debug information. + * + * This function collects and logs underflow-related HW states when underflow happens, + * including OTG underflow status, current read positions, frame count, and per-HUBP debug data. + * The results are stored in the provided out_data structure for further analysis or logging. + */ +void dc_get_underflow_debug_data_for_otg(struct dc *dc, int primary_otg_inst, struct dc_underflow_debug_data *out_data); + #endif /* DC_INTERFACE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index afbcf866520e..53a088ebddef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1269,12 +1269,16 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; new_signals.bits.allow_z10 = 1; + // New in IPSv2.0 + new_signals.bits.allow_ips1z8 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { new_signals.bits.allow_ips1 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) { + // IPSv1.0 only new_signals.bits.allow_pg = 1; new_signals.bits.allow_ips1 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) { + // IPSv1.0 only new_signals.bits.allow_pg = 1; new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; @@ -1286,6 +1290,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; new_signals.bits.allow_z10 = 1; + // New in IPSv2.0 + new_signals.bits.allow_ips1z8 = 1; } else { /* RCG only */ new_signals.bits.allow_pg = 0; @@ -1293,8 +1299,28 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips2 = 0; new_signals.bits.allow_z10 = 0; } + } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_Z8_RETENTION) { + new_signals.bits.allow_pg = 1; + new_signals.bits.allow_ips1 = 1; + new_signals.bits.allow_ips2 = 1; + new_signals.bits.allow_z10 = 1; + } + // Setting RCG allow bits (IPSv2.0) + if (dc->config.disable_ips_rcg == DMUB_IPS_RCG_ENABLE) { + new_signals.bits.allow_ips0_rcg = 1; + new_signals.bits.allow_ips1_rcg = 1; + } else if (dc->config.disable_ips_rcg == DMUB_IPS0_RCG_DISABLE) { + new_signals.bits.allow_ips1_rcg = 1; + } else if (dc->config.disable_ips_rcg == DMUB_IPS1_RCG_DISABLE) { + new_signals.bits.allow_ips0_rcg = 1; + } + // IPS dynamic allow bits (IPSv2 change, vpb use case) + if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG) { + new_signals.bits.allow_dynamic_ips1 = 1; + } else if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_ALL) { + new_signals.bits.allow_dynamic_ips1 = 1; + new_signals.bits.allow_dynamic_ips1_z8 = 1; } - ips_driver->signals = new_signals; dc_dmub_srv->driver_signals = ips_driver->signals; } @@ -1318,7 +1344,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv; - uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0; + uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0, ips1z8_exit_count = 0; if (dc->debug.dmcub_emulation) return; @@ -1338,31 +1364,34 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) rcg_exit_count = ips_fw->rcg_exit_count; ips1_exit_count = ips_fw->ips1_exit_count; ips2_exit_count = ips_fw->ips2_exit_count; + ips1z8_exit_count = ips_fw->ips1_z8ret_exit_count; ips_driver->signals.all = 0; dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u ips1z8=%u) (count rcg=%u ips1=%u ips2=%u ips1_z8=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit, ips_fw->rcg_entry_count, ips_fw->ips1_entry_count, - ips_fw->ips2_entry_count); + ips_fw->ips2_entry_count, + ips_fw->ips1_z8ret_entry_count); /* Note: register access has technically not resumed for DCN here, but we * need to be message PMFW through our standard register interface. */ dc_dmub_srv->needs_idle_wake = false; - if ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) && + if (!dc->caps.ips_v2_support && ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) && (!dc->debug.optimize_ips_handshake || - ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { + ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle))) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u )", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1422,28 +1451,31 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1 || prev_driver_signals.all == 0) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u ips1z8=%u)", ips_fw->signals.bits.ips1_commit, - ips_fw->signals.bits.ips2_commit); + ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit); while (ips_fw->signals.bits.ips1_commit) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u ips1z8=%u)", ips_fw->signals.bits.ips1_commit, - ips_fw->signals.bits.ips2_commit); + ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit); } } if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u ips1z8=%u)", __func__, rcg_exit_count, ips1_exit_count, - ips2_exit_count); + ips2_exit_count, + ips1z8_exit_count); } void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state) @@ -1656,7 +1688,7 @@ bool dc_wake_and_execute_gpint(const struct dc_context *ctx, enum dmub_gpint_com return result; } -void dc_dmub_srv_fams2_update_config(struct dc *dc, +static void dc_dmub_srv_rb_based_fams2_update_config(struct dc *dc, struct dc_state *context, bool enable) { @@ -1722,6 +1754,63 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); } +static void dc_dmub_srv_ib_based_fams2_update_config(struct dc *dc, + struct dc_state *context, + bool enable) +{ + struct dmub_fams2_config_v2 *config = (struct dmub_fams2_config_v2 *)dc->ctx->dmub_srv->dmub->ib_mem_gart.cpu_addr; + union dmub_rb_cmd cmd; + uint32_t i; + + memset(config, 0, sizeof(*config)); + memset(&cmd, 0, sizeof(cmd)); + + cmd.ib_fams2_config.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + cmd.ib_fams2_config.header.sub_type = DMUB_CMD__FAMS2_IB_CONFIG; + + cmd.ib_fams2_config.ib_data.src.quad_part = dc->ctx->dmub_srv->dmub->ib_mem_gart.gpu_addr; + cmd.ib_fams2_config.ib_data.size = sizeof(*config); + + if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { + /* copy static feature configuration overrides */ + config->global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery; + config->global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip; + config->global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug; + + /* send global configuration parameters */ + memcpy(&config->global, &context->bw_ctx.bw.dcn.fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); + + /* construct per-stream configs */ + for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { + /* copy stream static base state */ + memcpy(&config->stream_v1[i].base, + &context->bw_ctx.bw.dcn.fams2_stream_base_params[i], + sizeof(config->stream_v1[i].base)); + + /* copy stream static sub-state */ + memcpy(&config->stream_v1[i].sub_state, + &context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[i], + sizeof(config->stream_v1[i].sub_state)); + } + } + + config->global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; + config->global.features.bits.enable = enable; + + dm_execute_dmub_cmd_list(dc->ctx, 1, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + +void dc_dmub_srv_fams2_update_config(struct dc *dc, + struct dc_state *context, + bool enable) +{ + if (dc->debug.fams_version.major == 2) + dc_dmub_srv_rb_based_fams2_update_config(dc, context, enable); + if (dc->debug.fams_version.major == 3) + dc_dmub_srv_ib_based_fams2_update_config(dc, context, enable); +} + void dc_dmub_srv_fams2_drr_update(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, @@ -1847,83 +1936,315 @@ void dc_dmub_srv_fams2_passthrough_flip( } } -bool dc_dmub_srv_ips_residency_cntl(struct dc_dmub_srv *dc_dmub_srv, bool start_measurement) + +bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement) +{ + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.ips_residency_cntl.header.type = DMUB_CMD__IPS; + cmd.ips_residency_cntl.header.sub_type = DMUB_CMD__IPS_RESIDENCY_CNTL; + cmd.ips_residency_cntl.header.payload_bytes = sizeof(struct dmub_cmd_ips_residency_cntl_data); + + // only panel_inst=0 is supported at the moment + cmd.ips_residency_cntl.cntl_data.panel_inst = panel_inst; + cmd.ips_residency_cntl.cntl_data.start_measurement = start_measurement; + + if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) + return false; + + return true; +} + +bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst, struct dmub_ips_residency_info *driver_info, + enum ips_residency_mode ips_mode) { - bool result; + union dmub_rb_cmd cmd; + uint32_t bytes = sizeof(struct dmub_ips_residency_info); - if (!dc_dmub_srv || !dc_dmub_srv->dmub) + dmub_flush_buffer_mem(&ctx->dmub_srv->dmub->scratch_mem_fb); + memset(&cmd, 0, sizeof(cmd)); + + cmd.ips_query_residency_info.header.type = DMUB_CMD__IPS; + cmd.ips_query_residency_info.header.sub_type = DMUB_CMD__IPS_QUERY_RESIDENCY_INFO; + cmd.ips_query_residency_info.header.payload_bytes = sizeof(struct dmub_cmd_ips_query_residency_info_data); + + cmd.ips_query_residency_info.info_data.dest.quad_part = ctx->dmub_srv->dmub->scratch_mem_fb.gpu_addr; + cmd.ips_query_residency_info.info_data.size = bytes; + cmd.ips_query_residency_info.info_data.panel_inst = panel_inst; + cmd.ips_query_residency_info.info_data.ips_mode = (uint32_t)ips_mode; + + if (!dc_wake_and_execute_dmub_cmd(ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) || + cmd.ips_query_residency_info.header.ret_status == 0) return false; - result = dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__IPS_RESIDENCY, - start_measurement, NULL, DM_DMUB_WAIT_TYPE_WAIT); + // copy the result to the output since ret_status != 0 means the command returned data + memcpy(driver_info, ctx->dmub_srv->dmub->scratch_mem_fb.cpu_addr, bytes); + + return true; +} + +bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_INIT_CONFIG; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.init_data.gpu_addr_base.quad_part = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.gpu_addr; + lsdma_data->u.init_data.ring_size = dc_ctx->dmub_srv->dmub->lsdma_rb_fb.size; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Init failed in DMUB"); return result; } -void dc_dmub_srv_ips_query_residency_info(struct dc_dmub_srv *dc_dmub_srv, struct ips_residency_info *output) +bool dmub_lsdma_send_linear_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t src_addr, + uint64_t dst_addr, + uint32_t count +) { - uint32_t i; - enum dmub_gpint_command command_code; + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return; + memset(&cmd, 0, sizeof(cmd)); - switch (output->ips_mode) { - case DMUB_IPS_MODE_IPS1_MAX: - command_code = DMUB_GPINT__GET_IPS1_HISTOGRAM_COUNTER; - break; - case DMUB_IPS_MODE_IPS2: - command_code = DMUB_GPINT__GET_IPS2_HISTOGRAM_COUNTER; - break; - case DMUB_IPS_MODE_IPS1_RCG: - command_code = DMUB_GPINT__GET_IPS1_RCG_HISTOGRAM_COUNTER; - break; - case DMUB_IPS_MODE_IPS1_ONO2_ON: - command_code = DMUB_GPINT__GET_IPS1_ONO2_ON_HISTOGRAM_COUNTER; - break; - default: - command_code = DMUB_GPINT__INVALID_COMMAND; - break; - } + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_COPY; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; - if (command_code == DMUB_GPINT__INVALID_COMMAND) + lsdma_data->u.linear_copy_data.count = count - 1; // LSDMA controller expects bytes to copy -1 + lsdma_data->u.linear_copy_data.src_lo = src_addr & 0xFFFFFFFF; + lsdma_data->u.linear_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF; + lsdma_data->u.linear_copy_data.dst_lo = dst_addr & 0xFFFFFFFF; + lsdma_data->u.linear_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Linear Copy failed in DMUB"); + + return result; +} + +bool dmub_lsdma_send_linear_sub_window_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_linear_sub_window_copy_params copy_data +) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.linear_sub_window_copy_data.tmz = copy_data.tmz; + lsdma_data->u.linear_sub_window_copy_data.element_size = copy_data.element_size; + lsdma_data->u.linear_sub_window_copy_data.src_lo = copy_data.src_lo; + lsdma_data->u.linear_sub_window_copy_data.src_hi = copy_data.src_hi; + lsdma_data->u.linear_sub_window_copy_data.src_x = copy_data.src_x; + lsdma_data->u.linear_sub_window_copy_data.src_y = copy_data.src_y; + lsdma_data->u.linear_sub_window_copy_data.src_pitch = copy_data.src_pitch; + lsdma_data->u.linear_sub_window_copy_data.src_slice_pitch = copy_data.src_slice_pitch; + lsdma_data->u.linear_sub_window_copy_data.dst_lo = copy_data.dst_lo; + lsdma_data->u.linear_sub_window_copy_data.dst_hi = copy_data.dst_hi; + lsdma_data->u.linear_sub_window_copy_data.dst_x = copy_data.dst_x; + lsdma_data->u.linear_sub_window_copy_data.dst_y = copy_data.dst_y; + lsdma_data->u.linear_sub_window_copy_data.dst_pitch = copy_data.dst_pitch; + lsdma_data->u.linear_sub_window_copy_data.dst_slice_pitch = copy_data.dst_slice_pitch; + lsdma_data->u.linear_sub_window_copy_data.rect_x = copy_data.rect_x; + lsdma_data->u.linear_sub_window_copy_data.rect_y = copy_data.rect_y; + lsdma_data->u.linear_sub_window_copy_data.src_cache_policy = copy_data.src_cache_policy; + lsdma_data->u.linear_sub_window_copy_data.dst_cache_policy = copy_data.dst_cache_policy; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Linear Sub Window Copy failed in DMUB"); + + return result; +} + +bool dmub_lsdma_send_tiled_to_tiled_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_send_tiled_to_tiled_copy_command_params params +) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_TILED_TO_TILED_COPY; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.tiled_copy_data.src_addr_lo = params.src_addr & 0xFFFFFFFF; + lsdma_data->u.tiled_copy_data.src_addr_hi = (params.src_addr >> 32) & 0xFFFFFFFF; + lsdma_data->u.tiled_copy_data.dst_addr_lo = params.dst_addr & 0xFFFFFFFF; + lsdma_data->u.tiled_copy_data.dst_addr_hi = (params.dst_addr >> 32) & 0xFFFFFFFF; + lsdma_data->u.tiled_copy_data.src_x = params.src_x; + lsdma_data->u.tiled_copy_data.src_y = params.src_y; + lsdma_data->u.tiled_copy_data.dst_x = params.dst_x; + lsdma_data->u.tiled_copy_data.dst_y = params.dst_y; + lsdma_data->u.tiled_copy_data.src_width = params.src_width; + lsdma_data->u.tiled_copy_data.dst_width = params.dst_width; + lsdma_data->u.tiled_copy_data.src_swizzle_mode = params.swizzle_mode; + lsdma_data->u.tiled_copy_data.dst_swizzle_mode = params.swizzle_mode; + lsdma_data->u.tiled_copy_data.src_element_size = params.element_size; + lsdma_data->u.tiled_copy_data.dst_element_size = params.element_size; + lsdma_data->u.tiled_copy_data.rect_x = params.rect_x; + lsdma_data->u.tiled_copy_data.rect_y = params.rect_y; + lsdma_data->u.tiled_copy_data.dcc = params.dcc; + lsdma_data->u.tiled_copy_data.tmz = params.tmz; + lsdma_data->u.tiled_copy_data.read_compress = params.read_compress; + lsdma_data->u.tiled_copy_data.write_compress = params.write_compress; + lsdma_data->u.tiled_copy_data.src_height = params.src_height; + lsdma_data->u.tiled_copy_data.dst_height = params.dst_height; + lsdma_data->u.tiled_copy_data.data_format = params.data_format; + lsdma_data->u.tiled_copy_data.max_com = params.max_com; + lsdma_data->u.tiled_copy_data.max_uncom = params.max_uncom; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Tiled to Tiled Copy failed in DMUB"); + + return result; +} + +bool dmub_lsdma_send_pio_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t src_addr, + uint64_t dst_addr, + uint32_t byte_count, + uint32_t overlap_disable +) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_COPY; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.pio_copy_data.packet.fields.byte_count = byte_count; + lsdma_data->u.pio_copy_data.packet.fields.overlap_disable = overlap_disable; + lsdma_data->u.pio_copy_data.src_lo = src_addr & 0xFFFFFFFF; + lsdma_data->u.pio_copy_data.src_hi = (src_addr >> 32) & 0xFFFFFFFF; + lsdma_data->u.pio_copy_data.dst_lo = dst_addr & 0xFFFFFFFF; + lsdma_data->u.pio_copy_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA PIO Copy failed in DMUB"); + + return result; +} + +bool dmub_lsdma_send_pio_constfill_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t dst_addr, + uint32_t byte_count, + uint32_t data +) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_PIO_CONSTFILL; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.pio_constfill_data.packet.fields.constant_fill = 1; + lsdma_data->u.pio_constfill_data.packet.fields.byte_count = byte_count; + lsdma_data->u.pio_constfill_data.dst_lo = dst_addr & 0xFFFFFFFF; + lsdma_data->u.pio_constfill_data.dst_hi = (dst_addr >> 32) & 0xFFFFFFFF; + lsdma_data->u.pio_constfill_data.data = data; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA PIO Constfill failed in DMUB"); + + return result; +} + +bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data) +{ + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + union dmub_rb_cmd cmd; + enum dm_dmub_wait_type wait_type; + struct dmub_cmd_lsdma_data *lsdma_data = &cmd.lsdma.lsdma_data; + bool result; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.cmd_common.header.type = DMUB_CMD__LSDMA; + cmd.cmd_common.header.sub_type = DMUB_CMD__LSDMA_POLL_REG_WRITE; + wait_type = DM_DMUB_WAIT_TYPE_NO_WAIT; + + lsdma_data->u.reg_write_data.reg_addr = reg_addr; + lsdma_data->u.reg_write_data.reg_data = reg_data; + + result = dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, wait_type); + + if (!result) + DC_ERROR("LSDMA Poll Reg failed in DMUB"); + + return result; +} + +void dc_dmub_srv_release_hw(const struct dc *dc) +{ + struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; + union dmub_rb_cmd cmd = {0}; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) return; - for (i = 0; i < GPINT_RETRY_NUM; i++) { - // false could mean GPINT timeout, in which case we should retry - if (dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_PERCENT, - (uint16_t)(output->ips_mode), &output->residency_percent, - DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - break; - udelay(100); - } + memset(&cmd, 0, sizeof(cmd)); + cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT; + cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_RELEASE_HW; + cmd.idle_opt_notify_idle.header.payload_bytes = + sizeof(cmd.idle_opt_notify_idle) - + sizeof(cmd.idle_opt_notify_idle.header); - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_ENTRY_COUNTER, - (uint16_t)(output->ips_mode), - &output->entry_counter, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->entry_counter = 0; - - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_LO, - (uint16_t)(output->ips_mode), - &output->total_active_time_us[0], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->total_active_time_us[0] = 0; - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_HI, - (uint16_t)(output->ips_mode), - &output->total_active_time_us[1], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->total_active_time_us[1] = 0; - - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_LO, - (uint16_t)(output->ips_mode), - &output->total_inactive_time_us[0], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->total_inactive_time_us[0] = 0; - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, DMUB_GPINT__GET_IPS_INACTIVE_RESIDENCY_DURATION_US_HI, - (uint16_t)(output->ips_mode), - &output->total_inactive_time_us[1], DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->total_inactive_time_us[1] = 0; - - // NUM_IPS_HISTOGRAM_BUCKETS = 16 - for (i = 0; i < 16; i++) - if (!dc_wake_and_execute_gpint(dc_dmub_srv->ctx, command_code, i, &output->histogram[i], - DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY)) - output->histogram[i] = 0; + dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index ada5c2fb2db3..7ef93444ef3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -210,6 +210,94 @@ void dc_dmub_srv_fams2_passthrough_flip( struct dc_surface_update *srf_updates, int surface_count); +bool dmub_lsdma_init(struct dc_dmub_srv *dc_dmub_srv); +bool dmub_lsdma_send_linear_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t src_addr, + uint64_t dst_addr, + uint32_t count); + +struct lsdma_linear_sub_window_copy_params { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_pitch : 16; + uint32_t dst_pitch : 16; + + uint32_t src_slice_pitch; + uint32_t dst_slice_pitch; + + uint32_t tmz : 1; + uint32_t element_size : 3; + uint32_t src_cache_policy : 3; + uint32_t dst_cache_policy : 3; + uint32_t padding : 22; +}; + +bool dmub_lsdma_send_linear_sub_window_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_linear_sub_window_copy_params copy_data +); +bool dmub_lsdma_send_pio_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t src_addr, + uint64_t dst_addr, + uint32_t byte_count, + uint32_t overlap_disable); +bool dmub_lsdma_send_pio_constfill_command( + struct dc_dmub_srv *dc_dmub_srv, + uint64_t dst_addr, + uint32_t byte_count, + uint32_t data); + +struct lsdma_send_tiled_to_tiled_copy_command_params { + uint64_t src_addr; + uint64_t dst_addr; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t src_width : 16; + uint32_t dst_width : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_height : 16; + uint32_t dst_height : 16; + + uint32_t data_format : 6; + uint32_t swizzle_mode : 5; + uint32_t element_size : 3; + uint32_t dcc : 1; + uint32_t tmz : 1; + uint32_t read_compress : 2; + uint32_t write_compress : 2; + uint32_t max_com : 2; + uint32_t max_uncom : 1; + uint32_t padding : 9; +}; + +bool dmub_lsdma_send_tiled_to_tiled_copy_command( + struct dc_dmub_srv *dc_dmub_srv, + struct lsdma_send_tiled_to_tiled_copy_command_params params); +bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uint32_t reg_addr, uint32_t reg_data); + /** * struct ips_residency_info - struct containing info from dmub_ips_residency_stats * @@ -223,7 +311,7 @@ void dc_dmub_srv_fams2_passthrough_flip( * @histogram: Histogram of given IPS state durations - bucket definitions in dmub_ips.c */ struct ips_residency_info { - enum dmub_ips_mode ips_mode; + enum ips_residency_mode ips_mode; unsigned int residency_percent; unsigned int entry_counter; unsigned int total_active_time_us[2]; @@ -231,21 +319,16 @@ struct ips_residency_info { unsigned int histogram[16]; }; -/** - * bool dc_dmub_srv_ips_residency_cntl() - Controls IPS residency measurement status - * - * @dc_dmub_srv: The DC DMUB service pointer - * @start_measurement: Describes whether to start or stop measurement - * - * Return: true if GPINT was sent successfully, false otherwise - */ -bool dc_dmub_srv_ips_residency_cntl(struct dc_dmub_srv *dc_dmub_srv, bool start_measurement); +bool dc_dmub_srv_ips_residency_cntl(const struct dc_context *ctx, uint8_t panel_inst, bool start_measurement); + +bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t panel_inst, + struct dmub_ips_residency_info *driver_info, + enum ips_residency_mode ips_mode); /** - * bool dc_dmub_srv_ips_query_residency_info() - Queries DMCUB for residency info + * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required. * - * @dc_dmub_srv: The DC DMUB service pointer - * @output: Output struct to copy the the residency info to + * @dc - pointer to DC object */ -void dc_dmub_srv_ips_query_residency_info(struct dc_dmub_srv *dc_dmub_srv, struct ips_residency_info *output); +void dc_dmub_srv_release_hw(const struct dc *dc); #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 0bad8304ccf6..db669ccb1d58 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -162,6 +162,11 @@ struct dc_link_settings { struct dc_tunnel_settings { bool should_enable_dp_tunneling; bool should_use_dp_bw_allocation; + uint8_t cm_id; + uint8_t group_id; + uint32_t bw_granularity; + uint32_t estimated_bw; + uint32_t allocated_bw; }; union dc_dp_ffe_preset { @@ -957,11 +962,21 @@ union usb4_driver_bw_cap { uint8_t raw; }; +/* DPCD[0xE0021] DP_IN_ADAPTER_TUNNEL_INFORMATION register. */ +union dpia_tunnel_info { + struct { + uint8_t group_id :3; + uint8_t rsvd :5; + } bits; + uint8_t raw; +}; + /* DP Tunneling over USB4 */ struct dpcd_usb4_dp_tunneling_info { union dp_tun_cap_support dp_tun_cap; union dpia_info dpia_info; union usb4_driver_bw_cap driver_bw_cap; + union dpia_tunnel_info dpia_tunnel_info; uint8_t usb4_driver_id; uint8_t usb4_topology_id[DPCD_USB4_TOPOLOGY_ID_LEN]; }; @@ -1006,7 +1021,8 @@ union dp_128b_132b_supported_lttpr_link_rates { union dp_alpm_lttpr_cap { struct { uint8_t AUX_LESS_ALPM_SUPPORTED :1; - uint8_t RESERVED :7; + uint8_t ASSR_SUPPORTED :1; + uint8_t RESERVED :6; } bits; uint8_t raw; }; @@ -1104,10 +1120,11 @@ union dp_128b_132b_training_aux_rd_interval { union edp_alpm_caps { struct { - uint8_t AUX_WAKE_ALPM_CAP :1; - uint8_t PM_STATE_2A_SUPPORT :1; - uint8_t AUX_LESS_ALPM_CAP :1; - uint8_t RESERVED :5; + uint8_t AUX_WAKE_ALPM_CAP :1; + uint8_t PM_STATE_2A_SUPPORT :1; + uint8_t AUX_LESS_ALPM_CAP :1; + uint8_t AUX_LESS_ALPM_ML_PHY_SLEEP_STATUS_SUPPORTED :1; + uint8_t RESERVED :4; } bits; uint8_t raw; }; @@ -1172,8 +1189,8 @@ struct dc_lttpr_caps { union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates; union dp_alpm_lttpr_cap alpm; uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1]; - uint8_t lttpr_ieee_oui[3]; - uint8_t lttpr_device_id[6]; + uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host + uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host }; struct dc_dongle_dfp_cap_ext { @@ -1267,6 +1284,7 @@ struct dpcd_caps { union dp_receive_port0_cap receive_port0_cap; /* Indicates the number of SST links supported by MSO (Multi-Stream Output) */ uint8_t mso_cap_sst_links_supported; + uint8_t dp_edp_general_cap_2; }; union dpcd_sink_ext_caps { @@ -1332,7 +1350,9 @@ union dpcd_alpm_configuration { struct { unsigned char ENABLE : 1; unsigned char IRQ_HPD_ENABLE : 1; - unsigned char RESERVED : 6; + unsigned char ALPM_MODE_SEL : 1; + unsigned char ACDS_PERIOD_DURATION : 1; + unsigned char RESERVED : 4; } bits; unsigned char raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 7217de258851..5a365bd19933 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -732,7 +732,7 @@ char *dce_version_to_string(const int version) case DCN_VERSION_3_03: return "DCN 3.0.3"; case DCN_VERSION_3_1: - return "DCN 3.1"; + return "DCN 3.1.2"; case DCN_VERSION_3_14: return "DCN 3.1.4"; case DCN_VERSION_3_15: @@ -755,3 +755,8 @@ char *dce_version_to_string(const int version) return "Unknown"; } } + +bool dc_supports_vrr(const enum dce_version v) +{ + return v >= DCE_VERSION_8_0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index d562ddeca512..667852517246 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -68,7 +68,7 @@ enum dc_plane_addr_type { struct dc_plane_address { enum dc_plane_addr_type type; - bool tmz_surface; + uint8_t tmz_surface; union { struct{ PHYSICAL_ADDRESS_LOC addr; @@ -974,6 +974,7 @@ struct dc_crtc_timing { uint32_t pix_clk_100hz; uint32_t min_refresh_in_uhz; + uint32_t max_refresh_in_uhz; uint32_t vic; uint32_t hdmi_vic; @@ -1103,7 +1104,8 @@ enum mpcc_gamut_remap_mode_select { enum mpcc_gamut_remap_id { MPCC_OGAM_GAMUT_REMAP, MPCC_MCM_FIRST_GAMUT_REMAP, - MPCC_MCM_SECOND_GAMUT_REMAP + MPCC_MCM_SECOND_GAMUT_REMAP, + MPCC_RMCM_GAMUT_REMAP, }; enum cursor_matrix_mode { diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index e3a8283b4098..55704d4457ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,7 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx); // Make spl input basic out info output_size width point to stream h active spl_in->basic_out.output_size.width = - stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->hblank_borrow; + stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; // Make spl input basic out info output_size height point to v active spl_in->basic_out.output_size.height = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; @@ -156,15 +156,16 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->adaptive_sharpness.enable = true; spl_in->adaptive_sharpness.sharpness_level = 0; } else if (sharpness_setting == SHARPNESS_CUSTOM) { - spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = 0; - spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = 1750; - spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = 750; - spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = 0; - spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = 3500; - spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = 1500; - spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = 0; - spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = 2750; - spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = 1500; + /* SAT: read harpness_range from dc_plane_state */ + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_min = plane_state->sharpness_range.sdr_rgb_min; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_max = plane_state->sharpness_range.sdr_rgb_max; + spl_in->adaptive_sharpness.sharpness_range.sdr_rgb_mid = plane_state->sharpness_range.sdr_rgb_mid; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_min = plane_state->sharpness_range.sdr_yuv_min; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_max = plane_state->sharpness_range.sdr_yuv_max; + spl_in->adaptive_sharpness.sharpness_range.sdr_yuv_mid = plane_state->sharpness_range.sdr_yuv_mid; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_min = plane_state->sharpness_range.hdr_rgb_min; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_max = plane_state->sharpness_range.hdr_rgb_max; + spl_in->adaptive_sharpness.sharpness_range.hdr_rgb_mid = plane_state->sharpness_range.hdr_rgb_mid; if (force_sharpness_level > 0) { if (force_sharpness_level > 10) diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 341d2ffb64b1..76cf9fdedab0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -203,6 +203,7 @@ struct dc_stream_state { struct dc_info_packet hfvsif_infopacket; struct dc_info_packet vtem_infopacket; struct dc_info_packet adaptive_sync_infopacket; + struct dc_info_packet avi_infopacket; uint8_t dsc_packed_pps[128]; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -335,6 +336,8 @@ struct dc_stream_update { struct dc_info_packet *hfvsif_infopacket; struct dc_info_packet *vtem_infopacket; struct dc_info_packet *adaptive_sync_infopacket; + struct dc_info_packet *avi_infopacket; + bool *dpms_off; bool integer_scaling_update; bool *allow_freesync; @@ -579,6 +582,17 @@ bool dc_stream_set_gamut_remap(struct dc *dc, bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream); +struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream, + bool allocate_one); + +void dc_stream_release_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream); + +void dc_stream_init_rmcm_3dlut(struct dc *dc); + struct pipe_ctx *dc_stream_get_pipe_ctx(struct dc_stream_state *stream); void dc_dmub_update_dirty_rect(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index a4cd0eb39a3a..b5aa03a3e39c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -175,6 +175,7 @@ struct dc_panel_patch { unsigned int embedded_tiled_slave; unsigned int disable_fams; unsigned int skip_avmute; + unsigned int skip_audio_sab_check; unsigned int mst_start_top_delay; unsigned int remove_sink_ext_caps; unsigned int disable_colorimetry; @@ -263,6 +264,7 @@ enum dc_timing_source { TIMING_SOURCE_EDID_4BYTE, TIMING_SOURCE_EDID_CEA_DISPLAYID_VTDB, TIMING_SOURCE_EDID_CEA_RID, + TIMING_SOURCE_EDID_DISPLAYID_TYPE5, TIMING_SOURCE_VBIOS, TIMING_SOURCE_CV, TIMING_SOURCE_TV, @@ -561,6 +563,12 @@ struct dc_info_packet_128 { uint8_t sb[128]; }; +struct dc_edid_read_policy { + uint32_t max_retry_count; + uint32_t delay_time_ms; + uint32_t ignore_checksum; +}; + #define DC_PLANE_UPDATE_TIMES_MAX 10 struct dc_plane_flip_time { @@ -569,6 +577,12 @@ struct dc_plane_flip_time { unsigned int prev_update_time_in_us; }; +enum dc_alpm_mode { + DC_ALPM_AUXWAKE = 0, + DC_ALPM_AUXLESS = 1, + DC_ALPM_UNSUPPORTED = 0xF, +}; + enum dc_psr_state { PSR_STATE0 = 0x0, PSR_STATE1, @@ -614,6 +628,7 @@ struct psr_config { unsigned int line_time_in_us; uint8_t rate_control_caps; uint16_t dsc_slice_height; + bool os_request_force_ffu; }; union dmcu_psr_level { @@ -726,6 +741,7 @@ struct psr_context { unsigned int line_time_in_us; uint8_t rate_control_caps; uint16_t dsc_slice_height; + bool os_request_force_ffu; }; struct colorspace_transform { @@ -1135,6 +1151,10 @@ struct replay_config { bool low_rr_supported; /* Replay Video Conferencing Optimization Enabled */ bool replay_video_conferencing_optimization_enabled; + /* Replay alpm mode */ + enum dc_alpm_mode alpm_mode; + /* Replay full screen only */ + bool os_request_force_ffu; }; /* Replay feature flags*/ @@ -1197,6 +1217,7 @@ struct dc_panel_config { bool rc_disable; bool rc_allow_static_screen; bool rc_allow_fullscreen_VPB; + bool read_psrcap_again; unsigned int replay_enable_option; } psr; /* ABM */ @@ -1255,7 +1276,6 @@ enum dc_cm2_gpu_mem_layout { enum dc_cm2_gpu_mem_pixel_component_order { DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA, - DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA }; enum dc_cm2_gpu_mem_format { @@ -1277,7 +1297,6 @@ struct dc_cm2_gpu_mem_format_parameters { enum dc_cm2_gpu_mem_size { DC_CM2_GPU_MEM_SIZE_171717, - DC_CM2_GPU_MEM_SIZE_333333, DC_CM2_GPU_MEM_SIZE_TRANSFORMED, }; @@ -1315,6 +1334,7 @@ struct dc_cm2_func_luts { bool mpc_3dlut_enable; bool rmcm_3dlut_enable; bool mpc_mcm_post_blend; + uint8_t rmcm_tmz; } lut3d_data; const struct dc_transfer_func *lut1d_func; }; @@ -1372,4 +1392,19 @@ struct set_backlight_level_params { uint8_t aux_inst; }; +enum dc_validate_mode { + /* validate the mode and program HW */ + DC_VALIDATE_MODE_AND_PROGRAMMING = 0, + /* only validate the mode */ + DC_VALIDATE_MODE_ONLY = 1, + /* validate the mode and get the max state (voltage level) */ + DC_VALIDATE_MODE_AND_STATE_INDEX = 2, +}; + +struct dc_validation_dpia_set { + const struct dc_link *link; + const struct dc_tunnel_settings *tunnel_settings; + uint32_t required_bw; +}; + #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 58c84f555c0f..de6d62401362 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -39,6 +39,7 @@ #define CTX \ dccg_dcn->base.ctx +#include "logger_types.h" #define DC_LOGGER \ dccg->ctx->logger @@ -133,30 +134,34 @@ enum dsc_clk_source { }; -static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_symclk32_se_rcg( @@ -385,35 +390,34 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) } } -static void dccg35_set_dppclk_rcg(struct dccg *dccg, - int inst, bool enable) +static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg) { - struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg) return; switch (inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1); break; default: BREAK_TO_DEBUGGER(); break; } - //DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1); + /* Wait for clock to ramp */ + if (!allow_rcg) + udelay(10); } static void dccg35_set_dpstreamclk_rcg( @@ -1133,7 +1137,7 @@ static void dcn35_set_dppclk_enable(struct dccg *dccg, default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); + DC_LOG_DEBUG("%s: dpp_inst(%d) DPPCLK_EN = %d\n", __func__, dpp_inst, enable); } @@ -1177,32 +1181,34 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst, } static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg, - uint32_t dpp_inst, uint32_t enable) + uint32_t dpp_inst, uint32_t disallow_rcg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg) return; switch (dpp_inst) { case 0: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg); break; case 1: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg); break; case 2: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg); break; case 3: - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg); break; default: break; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable); + /* Wait for clock to ramp */ + if (disallow_rcg) + udelay(10); } static void dccg35_get_pixel_rate_div( @@ -1401,6 +1407,10 @@ static void dccg35_set_dtbclk_dto( * PIPEx_DTO_SRC_SEL should not be programmed during DTBCLK update since OTG may still be on, and the * programming is handled in program_pix_clk() regardless, so it can be removed from here. */ + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO enabled: pixclk_khz=%d, ref_dtbclk_khz=%d, req_dtbclk_khz=%d, phase=%d, modulo=%d\n", + __func__, params->otg_inst, params->pixclk_khz, + params->ref_dtbclk_khz, req_dtbclk_khz, phase, modulo); + } else { switch (params->otg_inst) { case 0: @@ -1426,6 +1436,8 @@ static void dccg35_set_dtbclk_dto( REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + + DC_LOG_DEBUG("%s: OTG%d DTBCLK DTO disabled\n", __func__, params->otg_inst); } } @@ -1470,6 +1482,8 @@ static void dccg35_set_dpstreamclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_EN = %d, DPSTREAMCLK_SRC_SEL = %d\n", + __func__, dp_hpo_inst, (src == REFCLK) ? 0 : 1, otg_inst); } @@ -1509,6 +1523,8 @@ static void dccg35_set_dpstreamclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: dp_hpo_inst(%d) DPSTREAMCLK_ROOT_GATE_DISABLE = %d\n", + __func__, dp_hpo_inst, enable ? 1 : 0); } @@ -1548,7 +1564,7 @@ static void dccg35_set_physymclk_root_clock_gating( BREAK_TO_DEBUGGER(); return; } - //DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE:\n", __func__, phy_inst, enable ? 0 : 1); + DC_LOG_DEBUG("%s: dpp_inst(%d) PHYESYMCLK_ROOT_GATE_DISABLE: %d\n", __func__, phy_inst, enable ? 0 : 1); } @@ -1621,6 +1637,8 @@ static void dccg35_set_physymclk( BREAK_TO_DEBUGGER(); return; } + DC_LOG_DEBUG("%s: phy_inst(%d) PHYxSYMCLK_EN = %d, PHYxSYMCLK_SRC_SEL = %d\n", + __func__, phy_inst, force_enable ? 1 : 0, clk_src); } static void dccg35_set_valid_pixel_rate( @@ -1668,6 +1686,7 @@ static void dccg35_dpp_root_clock_control( } dccg->dpp_clock_gated[dpp_inst] = !clock_on; + DC_LOG_DEBUG("%s: dpp_inst(%d) clock_on = %d\n", __func__, dpp_inst, clock_on); } static void dccg35_disable_symclk32_se( @@ -1726,6 +1745,7 @@ static void dccg35_disable_symclk32_se( BREAK_TO_DEBUGGER(); return; } + } static void dccg35_init_cb(struct dccg *dccg) @@ -1733,7 +1753,6 @@ static void dccg35_init_cb(struct dccg *dccg) (void)dccg; /* Any RCG should be done when driver enter low power mode*/ } - void dccg35_init(struct dccg *dccg) { int otg_inst; @@ -1748,6 +1767,8 @@ void dccg35_init(struct dccg *dccg) for (otg_inst = 0; otg_inst < 2; otg_inst++) { dccg31_disable_symclk32_le(dccg, otg_inst); dccg31_set_symclk32_le_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d SYMCLK32_LE disabled and root clock gating disabled\n", + __func__, otg_inst); } // if (dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) @@ -1760,6 +1781,8 @@ void dccg35_init(struct dccg *dccg) dccg35_set_dpstreamclk(dccg, REFCLK, otg_inst, otg_inst); dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false); + DC_LOG_DEBUG("%s: OTG%d DPSTREAMCLK disabled and root clock gating disabled\n", + __func__, otg_inst); } /* @@ -1782,8 +1805,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) //Disable DTO switch (inst) { case 0: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, 0, @@ -1791,8 +1813,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); break; case 1: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, 0, @@ -1800,8 +1821,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, DSCCLK2_DTO_PHASE, 0, @@ -1809,8 +1829,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) - REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, DSCCLK3_DTO_PHASE, 0, @@ -1821,6 +1840,9 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst) BREAK_TO_DEBUGGER(); return; } + + /* Wait for clock to ramp */ + udelay(10); } static void dccg35_disable_dscclk(struct dccg *dccg, @@ -1864,6 +1886,9 @@ static void dccg35_disable_dscclk(struct dccg *dccg, default: return; } + + /* Wait for clock ramp */ + udelay(10); } static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) @@ -2349,10 +2374,7 @@ static void dccg35_disable_symclk_se_cb( void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating) { - - if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) { - dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); - } + dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating); } static const struct dccg_funcs dccg35_funcs_new = { diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index ffd172231fdf..0b8ed9b94d3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -619,7 +619,7 @@ void dccg401_set_dp_dto( dto_integer = div_u64(params->pixclk_hz, dto_modulo_hz); dto_phase_hz = params->pixclk_hz - dto_integer * dto_modulo_hz; - if (dto_phase_hz <= 0) { + if (dto_phase_hz <= 0 && dto_integer <= 0) { /* negative pixel rate should never happen */ BREAK_TO_DEBUGGER(); return; @@ -727,7 +727,7 @@ void dccg401_init(struct dccg *dccg) } } -void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) +void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h index 55e8718aad22..5947a35363aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h @@ -209,7 +209,7 @@ void dccg401_disable_symclk32_le( struct dccg *dccg, int hpo_le_inst); void dccg401_disable_dpstreamclk(struct dccg *dccg, int dp_hpo_inst); -void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst); +void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, uint32_t num_slices_h); void dccg401_set_ref_dscclk(struct dccg *dccg, uint32_t dsc_inst); void dccg401_set_src_sel( @@ -230,7 +230,6 @@ void dccg401_set_dp_dto( const struct dp_dto_params *params); void dccg401_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst); void dccg401_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst); -void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst); void dccg401_set_dtbclk_p_src( struct dccg *dccg, enum streamclk_source src, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index bb4ac5042c80..673bb87d2c17 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -725,14 +725,18 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, for (i = 0; i < AUX_MAX_RETRIES; i++) { DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, - "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: address=0x%04x length=%u write=%d mot=%d", + "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: " + "address=0x%04x length=%u write=%d mot=%d is_i2c=%d is_dpia=%d ddc_hw_inst=%d", ddc && ddc->link ? ddc->link->link_index : UINT_MAX, i + 1, (int)AUX_MAX_RETRIES, payload->address, payload->length, (unsigned int) payload->write, - (unsigned int) payload->mot); + (unsigned int) payload->mot, + payload->i2c_over_aux, + (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false, + ddc->link->ddc_hw_inst); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); @@ -746,7 +750,9 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, - "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", + "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: " + "address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d " + "payload->reply=%u is_i2c=%d is_dpia=%d ddc_hw_inst=%d", ddc && ddc->link ? ddc->link->link_index : UINT_MAX, i + 1, (int)AUX_MAX_RETRIES, @@ -756,7 +762,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot, ret, (int)operation_result, - (unsigned int) *payload->reply); + (unsigned int) *payload->reply, + payload->i2c_over_aux, + (ddc->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? true : false, + ddc->link->ddc_hw_inst); if (!payload->write) dce_aux_log_payload(" read", payload->data, ret > 0 ? ret : 0, 16); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index d28826c3ae5f..365dd2e37aea 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -292,9 +292,35 @@ static void set_speed( FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2); } +static bool acquire_engine(struct dce_i2c_hw *dce_i2c_hw) +{ + uint32_t arbitrate = 0; + + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + switch (arbitrate) { + case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW: + return true; + case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW: + return false; + case DC_I2C_STATUS__DC_I2C_STATUS_IDLE: + default: + break; + } + + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, true); + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) + return false; + + return true; +} + static bool setup_engine( struct dce_i2c_hw *dce_i2c_hw) { + // Deassert soft reset to unblock I2C engine registers + REG_UPDATE(DC_I2C_CONTROL, DC_I2C_SOFT_RESET, false); + uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; uint32_t reset_length = 0; @@ -309,8 +335,8 @@ static bool setup_engine( REG_UPDATE_N(SETUP, 1, FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN), 1); - /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/ - REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1); + if (!acquire_engine(dce_i2c_hw)) + return false; /*set SW requested I2c speed to default, if API calls in it will be override later*/ set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz); @@ -319,9 +345,8 @@ static bool setup_engine( i2c_setup_limit = dce_i2c_hw->setup_limit; /* Program pin select */ - REG_UPDATE_6(DC_I2C_CONTROL, + REG_UPDATE_5(DC_I2C_CONTROL, DC_I2C_GO, 0, - DC_I2C_SOFT_RESET, 0, DC_I2C_SEND_RESET, 0, DC_I2C_SW_STATUS_RESET, 1, DC_I2C_TRANSACTION_COUNT, 0, @@ -351,6 +376,32 @@ static bool setup_engine( return true; } +/** + * cntl_stuck_hw_workaround - Workaround for I2C engine stuck state + * @dce_i2c_hw: Pointer to dce_i2c_hw structure + * + * If we boot without an HDMI display, the I2C engine does not get initialized + * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after + * acquire. After setting SW_DONE_USING_I2C on release, the engine gets + * immediately reacquired by SW, preventing DMUB from using it. + * + * This function checks the I2C arbitration status and applies a release + * workaround if necessary. + */ +static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw) +{ + uint32_t arbitrate = 0; + + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) + return; + + // Still acquired after release, release again as a workaround + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true); + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + ASSERT(arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW); +} + static void release_engine( struct dce_i2c_hw *dce_i2c_hw) { @@ -378,9 +429,9 @@ static void release_engine( /*for HW HDCP Ri polling failure w/a test*/ set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz_hdcp); - /* Release I2C after reset, so HW or DMCU could use it */ - REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1, - DC_I2C_SW_USE_I2C_REG_REQ, 0); + // Release I2C engine so it can be used by HW or DMCU, automatically clears SW_USE_I2C + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true); + cntl_stuck_hw_workaround(dce_i2c_hw); if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) { if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) @@ -540,7 +591,7 @@ static bool dce_i2c_hw_engine_submit_payload(struct dce_i2c_hw *dce_i2c_hw, DCE_I2C_TRANSACTION_ACTION_I2C_WRITE; - request.address = (uint8_t) ((payload->address << 1) | !payload->write); + request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1)); request.length = payload->length; request.data = payload->data; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c index e188447c8156..2d73b94c515c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c @@ -451,7 +451,7 @@ static bool dce_i2c_sw_engine_submit_payload(struct dce_i2c_sw *engine, DCE_I2C_TRANSACTION_ACTION_I2C_WRITE_MOT : DCE_I2C_TRANSACTION_ACTION_I2C_WRITE; - request.address = (uint8_t) ((payload->address << 1) | !payload->write); + request.address = (uint8_t) ((payload->address << 1) | (payload->write ? 0 : 1)); request.length = payload->length; request.data = payload->data; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 4a9d07c31bc5..0c50fe266c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -896,13 +896,13 @@ void dce110_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); @@ -1798,13 +1798,13 @@ void dce60_link_encoder_construct( enc110->base.id, &bp_cap_info); /* Override features with DCE-specific values */ - if (BP_RESULT_OK == result) { + if (result == BP_RESULT_OK) { enc110->base.features.flags.bits.IS_HBR2_CAPABLE = bp_cap_info.DP_HBR2_EN; enc110->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc110->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; - } else { + } else if (result != BP_RESULT_NORECORD) { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, result); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c index 2b1673d69ea8..1ab5ae9b5ea5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c @@ -154,10 +154,13 @@ static bool dce60_setup_scaling_configuration( REG_SET(SCL_BYPASS_CONTROL, 0, SCL_BYPASS_MODE, 0); if (data->taps.h_taps + data->taps.v_taps <= 2) { - /* Set bypass */ - - /* DCE6 has no SCL_MODE register, skip scale mode programming */ + /* Disable scaler functionality */ + REG_WRITE(SCL_SCALER_ENABLE, 0); + /* Clear registers that can cause glitches even when the scaler is off */ + REG_WRITE(SCL_TAP_CONTROL, 0); + REG_WRITE(SCL_AUTOMATIC_MODE_CONTROL, 0); + REG_WRITE(SCL_F_SHARP_CONTROL, 0); return false; } @@ -165,7 +168,7 @@ static bool dce60_setup_scaling_configuration( SCL_H_NUM_OF_TAPS, data->taps.h_taps - 1, SCL_V_NUM_OF_TAPS, data->taps.v_taps - 1); - /* DCE6 has no SCL_MODE register, skip scale mode programming */ + REG_WRITE(SCL_SCALER_ENABLE, 1); /* DCE6 has no SCL_BOUNDARY_MODE bit, skip replace out of bound pixels */ @@ -502,6 +505,8 @@ static void dce60_transform_set_scaler( REG_SET(DC_LB_MEM_SIZE, 0, DC_LB_MEM_SIZE, xfm_dce->lb_memory_size); + REG_WRITE(SCL_UPDATE, 0x00010000); + /* Clear SCL_F_SHARP_CONTROL value to 0 */ REG_WRITE(SCL_F_SHARP_CONTROL, 0); @@ -527,8 +532,7 @@ static void dce60_transform_set_scaler( if (coeffs_v != xfm_dce->filter_v || coeffs_h != xfm_dce->filter_h) { /* 4. Program vertical filters */ if (xfm_dce->filter_v == NULL) - REG_SET(SCL_VERT_FILTER_CONTROL, 0, - SCL_V_2TAP_HARDCODE_COEF_EN, 0); + REG_WRITE(SCL_VERT_FILTER_CONTROL, 0); program_multi_taps_filter( xfm_dce, data->taps.v_taps, @@ -542,8 +546,7 @@ static void dce60_transform_set_scaler( /* 5. Program horizontal filters */ if (xfm_dce->filter_h == NULL) - REG_SET(SCL_HORZ_FILTER_CONTROL, 0, - SCL_H_2TAP_HARDCODE_COEF_EN, 0); + REG_WRITE(SCL_HORZ_FILTER_CONTROL, 0); program_multi_taps_filter( xfm_dce, data->taps.h_taps, @@ -566,6 +569,8 @@ static void dce60_transform_set_scaler( /* DCE6 has no SCL_COEF_UPDATE_COMPLETE bit to flip to new coefficient memory */ /* DCE6 DATA_FORMAT register does not support ALPHA_EN */ + + REG_WRITE(SCL_UPDATE, 0); } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h index cbce194ec7b8..eb716e8337e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.h @@ -155,6 +155,9 @@ SRI(SCL_COEF_RAM_TAP_DATA, SCL, id), \ SRI(VIEWPORT_START, SCL, id), \ SRI(VIEWPORT_SIZE, SCL, id), \ + SRI(SCL_SCALER_ENABLE, SCL, id), \ + SRI(SCL_HORZ_FILTER_INIT_RGB_LUMA, SCL, id), \ + SRI(SCL_HORZ_FILTER_INIT_CHROMA, SCL, id), \ SRI(SCL_HORZ_FILTER_SCALE_RATIO, SCL, id), \ SRI(SCL_VERT_FILTER_SCALE_RATIO, SCL, id), \ SRI(SCL_VERT_FILTER_INIT, SCL, id), \ @@ -590,6 +593,7 @@ struct dce_transform_registers { uint32_t SCL_VERT_FILTER_SCALE_RATIO; uint32_t SCL_HORZ_FILTER_INIT; #if defined(CONFIG_DRM_AMD_DC_SI) + uint32_t SCL_SCALER_ENABLE; uint32_t SCL_HORZ_FILTER_INIT_RGB_LUMA; uint32_t SCL_HORZ_FILTER_INIT_CHROMA; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ff3b8244ba3d..87af4fdc04a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -391,7 +391,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, sizeof(DP_SINK_DEVICE_STR_ID_1))) link->psr_settings.force_ffu_mode = 1; - copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode; + copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode || psr_context->os_request_force_ffu; if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE && !link->dc->debug.disable_fec) && diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index fcd3d86ad517..f9542edff14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -3,6 +3,7 @@ // Copyright 2024 Advanced Micro Devices, Inc. #include "dc.h" +#include "link_service.h" #include "dc_dmub_srv.h" #include "dmub/dmub_srv.h" #include "core_types.h" @@ -168,6 +169,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->max_deviation_line = link->dpcd_caps.pr_info.max_deviation_line; copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->replay_support_fast_resync_in_ultra_sleep_mode = link->replay_settings.config.replay_support_fast_resync_in_ultra_sleep_mode; copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; @@ -189,6 +191,18 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; + copy_settings_data->flags.bitfields.alpm_mode = (enum dmub_alpm_mode)link->replay_settings.config.alpm_mode; + if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) { + copy_settings_data->auxless_alpm_data.lfps_setup_ns = dc->dc->debug.auxless_alpm_lfps_setup_ns; + copy_settings_data->auxless_alpm_data.lfps_period_ns = dc->dc->debug.auxless_alpm_lfps_period_ns; + copy_settings_data->auxless_alpm_data.lfps_silence_ns = dc->dc->debug.auxless_alpm_lfps_silence_ns; + copy_settings_data->auxless_alpm_data.lfps_t1_t2_override_us = + dc->dc->debug.auxless_alpm_lfps_t1t2_us; + copy_settings_data->auxless_alpm_data.lfps_t1_t2_offset_us = + dc->dc->debug.auxless_alpm_lfps_t1t2_offset_us; + copy_settings_data->auxless_alpm_data.lttpr_count = link->dc->link_srv->dp_get_lttpr_count(link); + } + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c index e0558a78b11c..1c1228116487 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_link_encoder.c @@ -812,7 +812,7 @@ bool dcn10_link_encoder_validate_output_with_stream( enc10, &stream->timing); break; case SIGNAL_TYPE_EDP: - is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false; + is_valid = stream->timing.pixel_encoding == PIXEL_ENCODING_RGB; break; case SIGNAL_TYPE_VIRTUAL: is_valid = true; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c index 22e66b375a7f..d928b4dcf6b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c @@ -28,7 +28,7 @@ #include "dcn10_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #include "dcn30/dcn30_afmt.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c index 0b47aeb60e79..bec0b4aaeb2b 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn20/dcn20_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn20_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c index 9a92f73d5b7f..84cc2ddc52fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c @@ -37,7 +37,7 @@ #include "link_enc_cfg.h" #include "dc_dmub_srv.h" #include "dal_asic_id.h" -#include "link.h" +#include "link_service.h" #define CTX \ enc10->base.ctx diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c index ae81451a3a72..3e85e9c3d2cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c @@ -30,7 +30,7 @@ #include "dcn314_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c index 1a9bb614c41e..3523d1cdc1a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn32/dcn32_dio_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn32_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6ab2a218b769..fd5d1dbf9dc6 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -29,7 +29,7 @@ #include "dcn35_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ @@ -397,7 +397,7 @@ static bool enc35_is_fifo_enabled(struct stream_encoder *enc) uint32_t reset_val; REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); - return (reset_val == 0) ? false : true; + return reset_val != 0; } void enc35_disable_fifo(struct stream_encoder *enc) { diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c index d5fa551dd3c9..99aab70ef3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c @@ -32,7 +32,7 @@ #include "dcn401_dio_stream_encoder.h" #include "reg_helper.h" #include "hw_shared.h" -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" #define DC_LOGGER \ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 7b9c22c45453..fbbf9c757b3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -277,12 +277,13 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc /* * SMU message tracing */ -void dm_trace_smu_msg(uint32_t msg_id, uint32_t param_in, struct dc_context *ctx); -void dm_trace_smu_delay(uint32_t delay, struct dc_context *ctx); - -#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_msg(msg_id, param_in, ctx) -#define TRACE_SMU_DELAY(response_delay, ctx) dm_trace_smu_delay(response_delay, ctx) +void dm_trace_smu_enter(uint32_t msg_id, uint32_t param_in, unsigned int delay, struct dc_context *ctx); +void dm_trace_smu_exit(bool success, uint32_t response, struct dc_context *ctx); +#define TRACE_SMU_MSG_DELAY(msg_id, param_in, delay, ctx) dm_trace_smu_enter(msg_id, param_in, delay, ctx) +#define TRACE_SMU_MSG(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_ENTER(msg_id, param_in, ctx) dm_trace_smu_enter(msg_id, param_in, 0, ctx) +#define TRACE_SMU_MSG_EXIT(success, response, ctx) dm_trace_smu_exit(success, response, ctx) /* * DMUB Interfaces @@ -311,4 +312,6 @@ void dm_dtn_log_end(struct dc_context *ctx, char *dce_version_to_string(const int version); +bool dc_supports_vrr(const enum dce_version v); + #endif /* __DM_SERVICES_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index bf63da266a18..3b093b8699ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -127,7 +127,7 @@ struct dm_pp_single_disp_config { uint32_t src_height; uint32_t src_width; uint32_t v_refresh; - uint32_t sym_clock; /* HDMI only */ + uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */ struct dc_link_settings link_settings; /* DP only */ }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index e1d500633dfa..b357683b4255 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -114,9 +114,6 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn401/dcn401_fpu.o := $(dml_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn401/dcn401_fpu.o := $(dml_rcflags) - ifdef CONFIG_DRM_AMD_DC_FP DML += display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o DML += dcn10/dcn10_fpu.o @@ -137,7 +134,6 @@ DML += dcn303/dcn303_fpu.o DML += dcn314/dcn314_fpu.o DML += dcn35/dcn35_fpu.o DML += dcn351/dcn351_fpu.o -DML += dcn401/dcn401_fpu.o DML += dsc/rc_calc_fpu.o DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index f1235bf9a596..74962791302f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -748,7 +748,7 @@ static unsigned int get_highest_allowed_voltage_level(bool is_vmin_only_asic) bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { /* * we want a breakdown of the various stages of validation, which the @@ -1119,7 +1119,7 @@ bool dcn_validate_bandwidth( BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (v->voltage_level != number_of_states_plus_one && !fast_validate) { + if (v->voltage_level != number_of_states_plus_one && validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) { float bw_consumed = v->total_bandwidth_consumed_gbyte_per_second; if (bw_consumed < v->fabric_and_dram_bandwidth_vmin0p65) @@ -1286,7 +1286,7 @@ bool dcn_validate_bandwidth( } } else if (v->voltage_level == number_of_states_plus_one) { BW_VAL_TRACE_SKIP(fail); - } else if (fast_validate) { + } else if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index e9fea9c2162e..7aaf13bbd4e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -30,8 +30,7 @@ #include "dcn20/dcn20_resource.h" #include "dcn21/dcn21_resource.h" #include "clk_mgr/dcn21/rn_clk_mgr.h" - -#include "link.h" +#include "link_service.h" #include "dcn20_fpu.h" #include "dc_state_priv.h" @@ -1315,7 +1314,7 @@ static void swizzle_to_dml_params( int dcn20_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int pipe_cnt, i; bool synchronized_vblank = true; @@ -1733,7 +1732,7 @@ void dcn20_calculate_wm(struct dc *dc, struct dc_state *context, int *out_pipe_cnt, int *pipe_split_from, int vlevel, - bool fast_validate) + enum dc_validate_mode validate_mode) { int pipe_cnt, i, pipe_idx; @@ -1780,10 +1779,10 @@ void dcn20_calculate_wm(struct dc *dc, struct dc_state *context, if (pipe_cnt != pipe_idx) { if (dc->res_pool->funcs->populate_dml_pipes) pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, - context, pipes, fast_validate); + context, pipes, validate_mode); else pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, - context, pipes, fast_validate); + context, pipes, validate_mode); } *out_pipe_cnt = pipe_cnt; @@ -2027,7 +2026,7 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st } static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *context, - bool fast_validate, display_e2e_pipe_params_st *pipes) + enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes) { bool out = false; @@ -2040,7 +2039,7 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co BW_VAL_TRACE_COUNT(); - out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate); + out = dcn20_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode); if (pipe_cnt == 0) goto validate_out; @@ -2050,12 +2049,12 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } - dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate); + dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode); dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); BW_VAL_TRACE_END_WATERMARKS(); @@ -2077,7 +2076,7 @@ validate_out: } bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, - bool fast_validate, display_e2e_pipe_params_st *pipes) + enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes) { bool voltage_supported = false; bool full_pstate_supported = false; @@ -2095,12 +2094,11 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, /*Unsafe due to current pipe merge and split logic*/ ASSERT(context != dc->current_state); - if (fast_validate) { - return dcn20_validate_bandwidth_internal(dc, context, true, pipes); - } + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + return dcn20_validate_bandwidth_internal(dc, context, validate_mode, pipes); // Best case, we support full UCLK switch latency - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes); full_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; if (context->bw_ctx.dml.soc.dummy_pstate_latency_us == 0 || @@ -2113,7 +2111,7 @@ bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.dram_clock_change_latency_us = context->bw_ctx.dml.soc.dummy_pstate_latency_us; memset(pipes, 0, dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st)); - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false, pipes); + voltage_supported = dcn20_validate_bandwidth_internal(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING, pipes); dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support; if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) { @@ -2156,14 +2154,14 @@ void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v, int dcn21_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { uint32_t pipe_cnt; int i; dc_assert_fp_enabled(); - pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); for (i = 0; i < pipe_cnt; i++) { @@ -2239,7 +2237,7 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, int *out_pipe_cnt, int *pipe_split_from, int vlevel_req, - bool fast_validate) + enum dc_validate_mode validate_mode) { int pipe_cnt, i, pipe_idx; int vlevel, vlevel_max; @@ -2281,10 +2279,10 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, if (pipe_cnt != pipe_idx) { if (dc->res_pool->funcs->populate_dml_pipes) pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, - context, pipes, fast_validate); + context, pipes, validate_mode); else pipe_cnt = dcn21_populate_dml_pipes_from_context(dc, - context, pipes, fast_validate); + context, pipes, validate_mode); } *out_pipe_cnt = pipe_cnt; @@ -2319,7 +2317,7 @@ static void dcn21_calculate_wm(struct dc *dc, struct dc_state *context, } bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, - bool fast_validate, display_e2e_pipe_params_st *pipes) + enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes) { bool out = false; @@ -2337,7 +2335,7 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, /*Unsafe due to current pipe merge and split logic*/ ASSERT(context != dc->current_state); - out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, fast_validate); + out = dcn21_fast_validate_bw(dc, context, pipes, &pipe_cnt, pipe_split_from, &vlevel, validate_mode); if (pipe_cnt == 0) goto validate_out; @@ -2347,12 +2345,12 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } - dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate); + dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, validate_mode); dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); BW_VAL_TRACE_END_WATERMARKS(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index b6c34198ddc8..aed00039ca62 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -44,14 +44,14 @@ void dcn20_calculate_dlg_params(struct dc *dc, int dcn20_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn20_calculate_wm(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *out_pipe_cnt, int *pipe_split_from, int vlevel, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn20_cap_soc_clocks(struct _vcs_dpi_soc_bounding_box_st *bb, struct pp_smu_nv_clock_table max_clocks); void dcn20_update_bounding_box(struct dc *dc, @@ -62,7 +62,7 @@ void dcn20_update_bounding_box(struct dc *dc, void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb); bool dcn20_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, - bool fast_validate, display_e2e_pipe_params_st *pipes); + enum dc_validate_mode validate_mode, display_e2e_pipe_params_st *pipes); void dcn20_fpu_set_wm_ranges(int i, struct pp_smu_wm_range_sets *ranges, struct _vcs_dpi_soc_bounding_box_st *loaded_bb); @@ -75,9 +75,9 @@ void dcn20_fpu_adjust_dppclk(struct vba_vars_st *v, int dcn21_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); -bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, bool - fast_validate, display_e2e_pipe_params_st *pipes); + enum dc_validate_mode validate_mode); +bool dcn21_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, enum + dc_validate_mode, display_e2e_pipe_params_st *pipes); void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 390c1a77fda6..9c58ff1069d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -646,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, // the dpte_group_bytes is reduced for the specific case of vertical // access of a tile surface that has dpte request of 8x1 ptes. - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 843d6004258c..570e6e39eb45 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -646,7 +646,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, // the dpte_group_bytes is reduced for the specific case of vertical // access of a tile surface that has dpte request of 8x1 ptes. - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 5718000627b0..f549da082c01 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -652,7 +652,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else //full size diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index 88789987bdbc..e5f5c0663750 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -339,7 +339,8 @@ void dcn30_fpu_calculate_wm_and_dlg( * newly found dummy_latency_index */ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true); + dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, + DC_VALIDATE_MODE_AND_PROGRAMMING, true); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != dm_dram_clock_change_unsupported; @@ -630,7 +631,8 @@ int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, while (dummy_latency_index < max_latency_table_entries) { context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false, true); + dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, + DC_VALIDATE_MODE_AND_PROGRAMMING, true); if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == dm_allow_self_refresh_and_mclk_switch) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index 8d4873f80df0..4fb37df54d59 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -620,7 +620,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 0c0b2d67c9cd..1aaa77265eed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -326,7 +326,7 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; - int j; + int j = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0; dc_assert_fp_enabled(); @@ -338,6 +338,15 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p dcn3_01_soc.num_chans = bw_params->num_channels; ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn3_01_soc.num_states - 1; j >= 0; j--) { @@ -353,8 +362,13 @@ void dcn301_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + /* Clocks independent of voltage level. */ + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; @@ -435,12 +449,12 @@ void dcn301_fpu_calculate_wm_and_dlg(struct dc *dc, &context->bw_ctx.dml, pipes, pipe_cnt); /* WM Set C */ table_entry = &bw_params->wm_table.entries[WM_C]; - vlevel = min(max(vlevel_req, 2), vlevel_max); + vlevel = clamp(vlevel_req, 2, vlevel_max); calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, &context->bw_ctx.dml, pipes, pipe_cnt); /* WM Set B */ table_entry = &bw_params->wm_table.entries[WM_B]; - vlevel = min(max(vlevel_req, 1), vlevel_max); + vlevel = clamp(vlevel_req, 1, vlevel_max); calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, &context->bw_ctx.dml, pipes, pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c index 8da97a96b1ce..8d7c59ec701d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn302/dcn302_fpu.c @@ -280,7 +280,7 @@ void dcn302_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p j = 0; /* create the final dcfclk and uclk table */ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c index e968870a4b81..b5d3fd4c3694 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn303/dcn303_fpu.c @@ -285,7 +285,7 @@ void dcn303_fpu_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_p j = 0; /* create the final dcfclk and uclk table */ while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 17a21bcbde17..1a28061bb9ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -808,6 +808,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc) { + dc_assert_fp_enabled(); + return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0); } @@ -815,6 +817,8 @@ int dcn_get_approx_det_segs_required_for_pstate( struct _vcs_dpi_soc_bounding_box_st *soc, int pix_clk_100hz, int bpp, int seg_size_kb) { + dc_assert_fp_enabled(); + /* Roughly calculate required crb to hide latency. In practice there is slightly * more buffer available for latency hiding */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index d2ae43a82ba5..dfcc5d50071e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -55,5 +55,5 @@ int dcn_get_approx_det_segs_required_for_pstate( int dcn31x_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); #endif /* __DCN31_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index c46bda2141ac..bfeb01477f0c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -615,7 +615,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 5ed117e11aa2..df9d50b9b57c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -306,7 +306,7 @@ static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing) int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -316,7 +316,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c dc_assert_fp_enabled(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h index d32c5bb99f4c..362ac79184ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h @@ -35,6 +35,6 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c index b7d2a0caec11..04df263ff65e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c @@ -703,7 +703,7 @@ static void get_meta_and_pte_attr( if (hostvm_enable) rq_sizing_param->dpte_group_bytes = 512; else { - if (!surf_linear & (log2_dpte_req_height_ptes == 0) & surf_vert) //reduced, in this case, will have page fault within a group + if (!surf_linear && (log2_dpte_req_height_ptes == 0) && surf_vert) //reduced, in this case, will have page fault within a group rq_sizing_param->dpte_group_bytes = 512; else rq_sizing_param->dpte_group_bytes = 2048; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index b0fc1fd20208..8a0f128722b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -31,7 +31,7 @@ // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" #include "dcn30/dcn30_resource.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER_INIT(logger) @@ -290,7 +290,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support; context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING); /* for subvp + DRR case, if subvp pipes are still present we support pstate */ if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported && @@ -1479,7 +1479,7 @@ static bool dcn32_full_validate_bw_helper(struct dc *dc, /* Conditions for setting up phantom pipes for SubVP: * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) + * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING) * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) * 4. Display configuration passes validation * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) @@ -1517,7 +1517,8 @@ static bool dcn32_full_validate_bw_helper(struct dc *dc, dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, + DC_VALIDATE_MODE_AND_PROGRAMMING); // Populate dppclk to trigger a recalculate in dml_get_voltage_level // so the phantom pipe DLG params can be assigned correctly. pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0); @@ -1560,7 +1561,8 @@ static bool dcn32_full_validate_bw_helper(struct dc *dc, dc_state_remove_phantom_streams_and_planes(dc, context); dc_state_release_phantom_streams_and_planes(dc, context); vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, + DC_VALIDATE_MODE_AND_PROGRAMMING); *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); /* This may adjust vlevel and maxMpcComb */ @@ -2138,7 +2140,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, display_e2e_pipe_params_st *pipes, int *pipe_cnt_out, int *vlevel_out, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; bool repopulate_pipes = false; @@ -2162,7 +2164,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, for (i = 0; i < context->stream_count; i++) resource_update_pipes_for_stream_with_slice_count(context, dc->current_state, dc->res_pool, context->streams[i], 1); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); if (!pipe_cnt) { out = true; @@ -2172,13 +2174,13 @@ bool dcn32_internal_validate_bw(struct dc *dc, dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); context->bw_ctx.dml.soc.max_vratio_pre = dcn32_determine_max_vratio_prefetch(dc, context); - if (!fast_validate) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) { if (!dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt, &repopulate_pipes)) goto validate_fail; } - if (fast_validate || + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING || (dc->debug.dml_disallow_alternate_prefetch_modes && (vlevel == context->bw_ctx.dml.soc.num_states || vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { @@ -2195,7 +2197,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_none; - context->bw_ctx.dml.validate_max_state = fast_validate; + context->bw_ctx.dml.validate_max_state = (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING); vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); context->bw_ctx.dml.validate_max_state = false; @@ -2247,7 +2249,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, int flag_vlevel = vlevel; int i; - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); if (!dc->config.enable_windowed_mpo_odm) dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); @@ -2343,7 +2345,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, DC_VALIDATE_MODE_AND_PROGRAMMING); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; if (is_subvp_p_drr) { context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; @@ -2389,7 +2391,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; } - dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel_temp, false); + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel_temp, + DC_VALIDATE_MODE_AND_PROGRAMMING); if (vlevel_temp < vlevel) { vlevel = vlevel_temp; maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; @@ -2410,7 +2413,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, stream_status->fpo_in_use = false; } context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; - dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, + DC_VALIDATE_MODE_AND_PROGRAMMING); } } } @@ -3225,7 +3229,7 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { @@ -3397,7 +3401,7 @@ bool dcn32_allow_subvp_with_active_margin(struct pipe_ctx *pipe) uint32_t height = subvp_active_margin_list.res[i].height; refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + (uint64_t)pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 276e90e4e0ce..273d2bd79d85 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -49,7 +49,7 @@ bool dcn32_internal_validate_bw(struct dc *dc, display_e2e_pipe_params_st *pipes, int *pipe_cnt_out, int *vlevel_out, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c index 9ba6cb67655f..6c75aa82327a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c @@ -139,7 +139,6 @@ void dml32_rq_dlg_get_rq_reg(display_rq_regs_st *rq_regs, if (dual_plane) { unsigned int p1_pte_row_height_linear = get_dpte_row_height_linear_c(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - ; if (src->sw_mode == dm_sw_linear) ASSERT(p1_pte_row_height_linear >= 8); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 8839faf42207..e0a1dc89ce43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -779,7 +779,7 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 92f0a099d089..817a370e80a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -31,7 +31,7 @@ #include "dml/dcn31/dcn31_fpu.h" #include "dml/dml_inline_defs.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) @@ -437,7 +437,7 @@ static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing) int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -445,8 +445,10 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, bool upscaled = false; const unsigned int max_allowed_vblank_nom = 1023; + dc_assert_fp_enabled(); + dcn31_populate_dml_pipes_from_context(dc, context, pipes, - fast_validate); + validate_mode); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing; @@ -498,9 +500,7 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; @@ -581,6 +581,8 @@ void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) unsigned int i, plane_count = 0; DC_LOGGER_INIT(dc->ctx->logger); + dc_assert_fp_enabled(); + for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h index 067480fc3691..d121c5afce71 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h @@ -37,7 +37,7 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c index 17d0b4923b0c..77023b619f1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c @@ -10,7 +10,7 @@ #include "dml/dcn35/dcn35_fpu.h" #include "dml/dml_inline_defs.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) @@ -470,7 +470,7 @@ static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing) int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -478,8 +478,10 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, bool upscaled = false; const unsigned int max_allowed_vblank_nom = 1023; + dc_assert_fp_enabled(); + dcn31_populate_dml_pipes_from_context(dc, context, pipes, - fast_validate); + validate_mode); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { struct dc_crtc_timing *timing; @@ -531,9 +533,7 @@ int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h index f93efab9a668..f71d9d8d0759 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.h @@ -12,7 +12,7 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc, int dcn351_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c deleted file mode 100644 index 4fbecb5ff349..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.c +++ /dev/null @@ -1,239 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dcn401_fpu.h" -#include "dcn401/dcn401_resource.h" -// We need this includes for WATERMARKS_* defines -#include "clk_mgr/dcn401/dcn401_smu14_driver_if.h" -#include "link.h" - -#define DC_LOGGER_INIT(logger) - -void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) -{ - /* defaults */ - double pstate_latency_us = clk_mgr->ctx->dc->dml.soc.dram_clock_change_latency_us; - double fclk_change_latency_us = clk_mgr->ctx->dc->dml.soc.fclk_change_latency_us; - double sr_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - uint16_t setb_min_uclk_mhz = min_uclk_mhz; - uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; - - dc_assert_fp_enabled(); - - /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ - if (dcfclk_mhz_for_the_second_state) - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; - else - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - - if (clk_mgr->bw_params->clk_table.entries[2].memclk_mhz) - setb_min_uclk_mhz = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ - clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ - if (clk_mgr->ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { - clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; - clk_mgr->bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[1].memclk_mhz * 16; - clk_mgr->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[2].memclk_mhz * 16; - clk_mgr->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->bw_params->clk_table.entries[3].memclk_mhz * 16; - clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - } - /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ - /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ - clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD - clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD - clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; -} - -/* - * dcn401_update_bw_bounding_box - * - * This would override some dcn4_01 ip_or_soc initial parameters hardcoded from - * spreadsheet with actual values as per dGPU SKU: - * - with passed few options from dc->config - * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might - * need to get it from PM FW) - * - with passed latency values (passed in ns units) in dc-> bb override for - * debugging purposes - * - with passed latencies from VBIOS (in 100_ns units) if available for - * certain dGPU SKU - * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU - * of the same ASIC) - * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM - * FW for different clocks (which might differ for certain dGPU SKU of the - * same ASIC) - */ -void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) -{ - dc_assert_fp_enabled(); - - /* Override from passed dc->bb_overrides if available*/ - if (dc->bb_overrides.sr_exit_time_ns) - dc->dml2_options.bbox_overrides.sr_exit_latency_us = - dc->bb_overrides.sr_exit_time_ns / 1000.0; - - if (dc->bb_overrides.sr_enter_plus_exit_time_ns) - dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - - if (dc->bb_overrides.urgent_latency_ns) - dc->dml2_options.bbox_overrides.urgent_latency_us = - dc->bb_overrides.urgent_latency_ns / 1000.0; - - if (dc->bb_overrides.dram_clock_change_latency_ns) - dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - - if (dc->bb_overrides.fclk_clock_change_latency_ns) - dc->dml2_options.bbox_overrides.fclk_change_latency_us = - dc->bb_overrides.fclk_clock_change_latency_ns / 1000; - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = - bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = - bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dc->dml2_options.bbox_overrides.sr_exit_latency_us = - bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) { - dc->dml2_options.bbox_overrides.dram_num_chan = - dc->ctx->dc_bios->vram_info.num_chans; - - } - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dc->dml2_options.bbox_overrides.dram_chanel_width_bytes = - dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - dc->dml2_options.bbox_overrides.disp_pll_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml2_options.bbox_overrides.xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000.0; - dc->dml2_options.bbox_overrides.dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; - dc->dml2_options.bbox_overrides.dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000.0; - - if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) { - unsigned int i = 0; - - dc->dml2_options.bbox_overrides.clks_table.num_states = dc->clk_mgr->bw_params->clk_table.num_entries; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; - - dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = - dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz) - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz; - } - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz) - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz; - } - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz) - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz; - } - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz) - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz; - } - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz) - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz; - } - - for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) { - if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) { - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; - dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = - dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz; - } - } - } -} - diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h deleted file mode 100644 index 329f1788843c..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn401/dcn401_fpu.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DCN401_FPU_H__ -#define __DCN401_FPU_H__ - -#include "clk_mgr.h" - -void dcn401_build_wm_range_table_fpu(struct clk_mgr *clk_mgr); - -void dcn401_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 157ecf008d6c..4c21ce42054c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -81,10 +81,11 @@ AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2)) AMD_DISPLAY_FILES += $(AMD_DAL_DML2) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_ccflags) $(frame_warn_flag) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) @@ -94,17 +95,16 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflag CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) - - CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml21_wrapper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_utils.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) @@ -120,6 +120,7 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) DML21 := src/dml2_top/dml2_top_interfaces.o DML21 += src/dml2_top/dml2_top_soc15.o DML21 += src/dml2_core/dml2_core_dcn4.o +DML21 += src/dml2_core/dml2_core_utils.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 7ae9c0ba0c9e..4b9b2e84d381 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6529,7 +6529,7 @@ static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mo mode_lib->ms.TotImmediateFlipBytes = 0; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) { - mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]; + mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k]); if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) { mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]); } else { @@ -10189,7 +10189,7 @@ dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_para result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx, in_out_params->mode_lib, in_out_params->in_display_cfg, - 0, + in_out_params->in_start_state_idx, in_out_params->mode_lib->states.num_states - 1); if (result) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h index 0670e4dc4fd9..dbeb08466092 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h @@ -1917,6 +1917,7 @@ struct display_mode_lib_st { struct dml_mode_support_ex_params_st { struct display_mode_lib_st *mode_lib; const struct dml_display_cfg_st *in_display_cfg; + dml_uint_t in_start_state_idx; dml_uint_t out_lowest_state_idx; struct dml_mode_support_info_st *out_evaluation_info; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index d47cacfdb695..bf5e7f4e0416 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -2,338 +2,73 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml21_wrapper.h" #include "dml2_core_dcn4_calcs.h" #include "dml2_internal_shared_types.h" #include "dml2_internal_types.h" #include "dml21_utils.h" #include "dml21_translation_helper.h" -#include "bounding_boxes/dcn4_soc_bb.h" - -static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - const struct dml2_soc_bb *soc_bb; - const struct dml2_soc_qos_parameters *qos_params; - - switch (in_dc->ctx->dce_version) { - case DCN_VERSION_4_01: - default: - if (config->bb_from_dmub) - soc_bb = config->bb_from_dmub; - else - soc_bb = &dml2_socbb_dcn401; - - qos_params = &dml_dcn4_variant_a_soc_qos_params; - } +#include "soc_and_ip_translator.h" - /* patch soc bb */ - memcpy(&dml_init->soc_bb, soc_bb, sizeof(struct dml2_soc_bb)); - - /* patch qos params */ - memcpy(&dml_init->soc_bb.qos_parameters, qos_params, sizeof(struct dml2_soc_qos_parameters)); -} - -static void dml21_external_socbb_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config) -{ - memcpy(&dml_init->soc_bb, &config->external_socbb_ip_params->soc_bb, sizeof(struct dml2_soc_bb)); -} - -static void dml21_external_ip_params(struct dml2_initialize_instance_in_out *dml_init, +static void dml21_populate_pmo_options(struct dml2_pmo_options *pmo_options, + const struct dc *in_dc, const struct dml2_configuration_options *config) { - memcpy(&dml_init->ip_caps, &config->external_socbb_ip_params->ip_params, sizeof(struct dml2_ip_capabilities)); + bool disable_fams2 = !in_dc->debug.fams2_config.bits.enable; + + /* ODM options */ + pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; + pmo_options->disable_dyn_odm_for_multi_stream = true; + pmo_options->disable_dyn_odm_for_stream_with_svp = true; + + pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); + + /* NOTE: DRR and SubVP Require FAMS2 */ + pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || + in_dc->debug.force_disable_subvp || + disable_fams2; + pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || + disable_fams2; + pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || + disable_fams2; + pmo_options->disable_fams2 = disable_fams2; + + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } -static void dml21_init_ip_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) +static enum dml2_project_id dml21_dcn_revision_to_dml2_project_id(enum dce_version dcn_version) { - const struct dml2_ip_capabilities *ip_caps; - - switch (in_dc->ctx->dce_version) { + enum dml2_project_id project_id; + switch (dcn_version) { case DCN_VERSION_4_01: + project_id = dml2_project_dcn4x_stage2_auto_drr_svp; + break; default: - ip_caps = &dml2_dcn401_max_ip_caps; + project_id = dml2_project_invalid; + DC_ERR("unsupported dcn version for DML21!"); + break; } - memcpy(&dml_init->ip_caps, ip_caps, sizeof(struct dml2_ip_capabilities)); + return project_id; } -void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_init, +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc) { - if (config->use_native_soc_bb_construction) - dml21_init_socbb_params(dml_init, config, in_dc); - else - dml21_external_socbb_params(dml_init, config); -} - -void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, - const struct dc *in_dc) -{ - if (config->use_native_soc_bb_construction) - dml21_init_ip_params(dml_init, config, in_dc); - else - dml21_external_ip_params(dml_init, config); -} - -void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_init, - const struct dml2_configuration_options *config, const struct dc *in_dc) -{ - int i; - - const struct clk_bw_params *dc_bw_params = in_dc->clk_mgr->bw_params; - const struct clk_limit_table *dc_clk_table = &dc_bw_params->clk_table; - struct dml2_soc_bb *dml_soc_bb = &dml_init->soc_bb; - struct dml2_soc_state_table *dml_clk_table = &dml_soc_bb->clk_table; - - /* override clocks if smu is present */ - if (in_dc->clk_mgr->funcs->is_smu_present && in_dc->clk_mgr->funcs->is_smu_present(in_dc->clk_mgr)) { - /* dcfclk */ - if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) { - dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dcfclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz && - dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) { - dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000; - dml_clk_table->dcfclk.num_clk_values = i + 1; - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = 0; - dml_clk_table->dcfclk.num_clk_values = i; - } - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000; - } - } else { - dml_clk_table->dcfclk.clk_values_khz[i] = 0; - } - } - } - - /* fclk */ - if (dc_clk_table->num_entries_per_clk.num_fclk_levels) { - dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->fclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz && - dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) { - dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000; - dml_clk_table->fclk.num_clk_values = i + 1; - } else { - dml_clk_table->fclk.clk_values_khz[i] = 0; - dml_clk_table->fclk.num_clk_values = i; - } - } else { - dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000; - } - } else { - dml_clk_table->fclk.clk_values_khz[i] = 0; - } - } - } + dml_init->options.project_id = dml21_dcn_revision_to_dml2_project_id(in_dc->ctx->dce_version); - /* uclk */ - if (dc_clk_table->num_entries_per_clk.num_memclk_levels) { - dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->uclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz && - dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) { - dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000; - dml_clk_table->uclk.num_clk_values = i + 1; - } else { - dml_clk_table->uclk.clk_values_khz[i] = 0; - dml_clk_table->uclk.num_clk_values = i; - } - } else { - dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000; - } - } else { - dml_clk_table->uclk.clk_values_khz[i] = 0; - } - } - } - - /* dispclk */ - if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) { - dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dispclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz && - dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) { - dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000; - dml_clk_table->dispclk.num_clk_values = i + 1; - } else { - dml_clk_table->dispclk.clk_values_khz[i] = 0; - dml_clk_table->dispclk.num_clk_values = i; - } - } else { - dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000; - } - } else { - dml_clk_table->dispclk.clk_values_khz[i] = 0; - } - } - } - - /* dppclk */ - if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) { - dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dppclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz && - dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) { - dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000; - dml_clk_table->dppclk.num_clk_values = i + 1; - } else { - dml_clk_table->dppclk.clk_values_khz[i] = 0; - dml_clk_table->dppclk.num_clk_values = i; - } - } else { - dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000; - } - } else { - dml_clk_table->dppclk.clk_values_khz[i] = 0; - } - } - } - - /* dtbclk */ - if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) { - dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->dtbclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz && - dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) { - dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000; - dml_clk_table->dtbclk.num_clk_values = i + 1; - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = 0; - dml_clk_table->dtbclk.num_clk_values = i; - } - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000; - } - } else { - dml_clk_table->dtbclk.clk_values_khz[i] = 0; - } - } - } - - /* socclk */ - if (dc_clk_table->num_entries_per_clk.num_socclk_levels) { - dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels; - for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { - if (i < dml_clk_table->socclk.num_clk_values) { - if (config->use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz && - dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) { - if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) { - dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000; - dml_clk_table->socclk.num_clk_values = i + 1; - } else { - dml_clk_table->socclk.clk_values_khz[i] = 0; - dml_clk_table->socclk.num_clk_values = i; - } - } else { - dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000; - } - } else { - dml_clk_table->socclk.clk_values_khz[i] = 0; - } - } - } - - /* do not override phyclks for now */ - /* phyclk */ - // dml_clk_table->phyclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_levels; - // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - // dml_clk_table->phyclk.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_mhz * 1000; - // } - - /* phyclk_d18 */ - // dml_clk_table->phyclk_d18.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_d18_levels; - // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - // dml_clk_table->phyclk_d18.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_d18_mhz * 1000; - // } - - /* phyclk_d32 */ - // dml_clk_table->phyclk_d32.num_clk_values = dc_clk_table->num_entries_per_clk.num_phyclk_d32_levels; - // for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { - // dml_clk_table->phyclk_d32.clk_values_khz[i] = dc_clk_table->entries[i].phyclk_d32_mhz * 1000; - // } - } - - dml_soc_bb->dchub_refclk_mhz = in_dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; - dml_soc_bb->dprefclk_mhz = in_dc->clk_mgr->dprefclk_khz / 1000; - dml_soc_bb->xtalclk_mhz = in_dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000; - dml_soc_bb->dispclk_dppclk_vco_speed_mhz = in_dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* override bounding box paramters from VBIOS */ - if (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns > 0) - dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = - (in_dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns + 9) / 10; - - if (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns > 0) - dml_soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = - (in_dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns + 9) / 10; - - if (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns > 0) - dml_soc_bb->power_management_parameters.stutter_exit_latency_us = - (in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10; - - if (dc_bw_params->num_channels) { - dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; - dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; - } else if (in_dc->ctx->dc_bios->vram_info.num_chans) { - dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans; - dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576; - } - - if (dc_bw_params->dram_channel_width_bytes) { - dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; - } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) { - dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - } - - /* override bounding box paramters from DC config */ - if (in_dc->bb_overrides.sr_exit_time_ns) { - dml_soc_bb->power_management_parameters.stutter_exit_latency_us = - in_dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if (in_dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dml_soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = - in_dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if (in_dc->bb_overrides.dram_clock_change_latency_ns) { - dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = - in_dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if (in_dc->bb_overrides.fclk_clock_change_latency_ns) { - dml_soc_bb->power_management_parameters.fclk_change_blackout_us = - in_dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0; + if (config->use_native_soc_bb_construction) { + in_dc->soc_and_ip_translator->translator_funcs->get_soc_bb(&dml_init->soc_bb, in_dc, config); + in_dc->soc_and_ip_translator->translator_funcs->get_ip_caps(&dml_init->ip_caps); + } else { + dml_init->soc_bb = config->external_socbb_ip_params->soc_bb; + dml_init->ip_caps = config->external_socbb_ip_params->ip_params; } - //TODO - // if (in_dc->bb_overrides.dummy_clock_change_latency_ns) { - // dml_soc_bb->power_management_parameters.dram_clk_change_blackout_us = - // in_dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - // } + dml21_populate_pmo_options(&dml_init->options.pmo_options, in_dc, config); } static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream) @@ -349,25 +84,29 @@ static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stre static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing, struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, struct dml2_context *dml_ctx) { unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz; + uint32_t pix_clk_100hz; - timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right; + timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->dsc_padding_params.dsc_hactive_padding; timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top; timing->h_front_porch = stream->timing.h_front_porch; timing->v_front_porch = stream->timing.v_front_porch; timing->pixel_clock_khz = stream->timing.pix_clk_100hz / 10; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + timing->pixel_clock_khz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz / 10; if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) timing->pixel_clock_khz *= 2; - timing->h_total = stream->timing.h_total; + timing->h_total = stream->timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; timing->v_total = stream->timing.v_total; timing->h_sync_width = stream->timing.h_sync_width; timing->interlaced = stream->timing.flags.INTERLACE; hblank_start = stream->timing.h_total - stream->timing.h_front_porch; - timing->h_blank_end = hblank_start - stream->timing.h_addressable + timing->h_blank_end = hblank_start - stream->timing.h_addressable - pipe_ctx->dsc_padding_params.dsc_hactive_padding - stream->timing.h_border_left - stream->timing.h_border_right; if (hblank_start < stream->timing.h_addressable) @@ -386,15 +125,16 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf /* limit min refresh rate to DC cap */ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz; if (stream->ctx->dc->caps.max_v_total != 0) { - min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL), - (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream))); + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) { + pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } else { + pix_clk_100hz = stream->timing.pix_clk_100hz; + } + min_hardware_refresh_in_uhz = div64_u64((pix_clk_100hz * 100000000ULL), + (timing->h_total * (long long)calc_max_hardware_v_total(stream))); } - if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) { - timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz; - } else { - timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz; - } + timing->drr_config.min_refresh_uhz = max(stream->timing.min_refresh_in_uhz, min_hardware_refresh_in_uhz); if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase && stream->ctx->dc->config.enable_fpo_flicker_detection == 1) @@ -442,21 +182,6 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf timing->vblank_nom = timing->v_total - timing->v_active; } -/** - * adjust_dml21_hblank_timing_config_from_pipe_ctx - Adjusts the horizontal blanking timing configuration - * based on the pipe context. - * @timing: Pointer to the dml2_timing_cfg structure to be adjusted. - * @pipe: Pointer to the pipe_ctx structure containing the horizontal blanking borrow value. - * - * This function modifies the horizontal active and blank end timings by adding and subtracting - * the horizontal blanking borrow value from the pipe context, respectively. - */ -static void adjust_dml21_hblank_timing_config_from_pipe_ctx(struct dml2_timing_cfg *timing, struct pipe_ctx *pipe) -{ - timing->h_active += pipe->hblank_borrow; - timing->h_blank_end -= pipe->hblank_borrow; -} - static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output, struct dc_stream_state *stream, const struct pipe_ctx *pipe) { @@ -726,7 +451,6 @@ static void populate_dml21_surface_config_from_plane_state( switch (plane_state->tiling_info.gfxversion) { case DcGfxVersion7: case DcGfxVersion8: - // Placeholder for programming the array_mode break; case DcGfxVersion9: case DcGfxVersion10: @@ -757,7 +481,9 @@ static const struct scaler_data *get_scaler_data_for_plane( temp_pipe->plane_state = pipe->plane_state; temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; temp_pipe->stream_res = pipe->stream_res; - temp_pipe->hblank_borrow = pipe->hblank_borrow; + temp_pipe->dsc_padding_params.dsc_hactive_padding = pipe->dsc_padding_params.dsc_hactive_padding; + temp_pipe->dsc_padding_params.dsc_htotal_padding = pipe->dsc_padding_params.dsc_htotal_padding; + temp_pipe->dsc_padding_params.dsc_pix_clk_100hz = pipe->dsc_padding_params.dsc_pix_clk_100hz; dml_ctx->config.callbacks.build_scaling_params(temp_pipe); break; } @@ -788,6 +514,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->pixel_format = dml2_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: plane->pixel_format = dml2_444_64; @@ -888,10 +615,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; break; - case DC_CM2_GPU_MEM_SIZE_333333: - plane->tdlut.tdlut_width_mode = dml2_tdlut_width_33_cube; - break; case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: + default: //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; } @@ -1026,8 +751,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s disp_cfg_stream_location = dml_dispcfg->num_streams++; ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); - populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); - adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); + populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index], dml_ctx); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index], &context->stream_status[stream_index]); @@ -1094,6 +818,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.base_efficiency = in_ctx->v21.mode_programming.programming->stutter.base_percent_efficiency; + context->bw_ctx.bw.dcn.clk.stutter_efficiency.low_power_efficiency = in_ctx->v21.mode_programming.programming->stutter.low_power_percent_efficiency; } static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 73a013be1e48..9880d3e0398e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -17,9 +17,7 @@ struct dml2_context; struct dml2_configuration_options; struct dml2_initialize_instance_in_out; -void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); -void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); -void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); +void dml21_populate_dml_init_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc); bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context); void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 930e86cdb88a..ee721606b883 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -384,6 +384,7 @@ void dml21_build_fams2_programming(const struct dc *dc, /* reset fams2 data */ memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2, 0, sizeof(union dmub_fams2_stream_static_sub_state_v2) * DML2_MAX_PLANES); memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); if (dml_ctx->v21.mode_programming.programming->fams2_required) { @@ -414,9 +415,16 @@ void dml21_build_fams2_programming(const struct dc *dc, memcpy(static_base_state, &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, sizeof(union dmub_cmd_fams2_config)); - memcpy(static_sub_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, - sizeof(union dmub_cmd_fams2_config)); + + if (dc->debug.fams_version.major == 3) { + memcpy(&context->bw_ctx.bw.dcn.fams2_stream_sub_params_v2[num_fams2_streams], + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params_v2, + sizeof(union dmub_fams2_stream_static_sub_state_v2)); + } else { + memcpy(static_sub_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, + sizeof(union dmub_cmd_fams2_config)); + } switch (dc->debug.fams_version.minor) { case 1: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 208d3651b6ba..08f7f03b1023 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -2,8 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include <linux/vmalloc.h> - #include "dml2_internal_types.h" #include "dml_top.h" #include "dml2_core_dcn4_calcs.h" @@ -37,15 +35,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) return true; } -static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) +static void dml21_populate_configuration_options(const struct dc *in_dc, + struct dml2_context *dml_ctx, + const struct dml2_configuration_options *config) { - bool disable_fams2; - struct dml2_pmo_options *pmo_options = &dml_ctx->v21.dml_init.options.pmo_options; - - /* ODM options */ - pmo_options->disable_dyn_odm = !config->minimize_dispclk_using_odm; - pmo_options->disable_dyn_odm_for_multi_stream = true; - pmo_options->disable_dyn_odm_for_stream_with_svp = true; + dml_ctx->config = *config; /* UCLK P-State options */ if (in_dc->debug.dml21_force_pstate_method) { @@ -55,52 +49,20 @@ static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_contex } else { dml_ctx->config.pmo.force_pstate_method_enable = false; } - - pmo_options->disable_vblank = ((in_dc->debug.dml21_disable_pstate_method_mask >> 1) & 1); - - /* NOTE: DRR and SubVP Require FAMS2 */ - disable_fams2 = !in_dc->debug.fams2_config.bits.enable; - pmo_options->disable_svp = ((in_dc->debug.dml21_disable_pstate_method_mask >> 2) & 1) || - in_dc->debug.force_disable_subvp || - disable_fams2; - pmo_options->disable_drr_clamped = ((in_dc->debug.dml21_disable_pstate_method_mask >> 3) & 1) || - disable_fams2; - pmo_options->disable_drr_var = ((in_dc->debug.dml21_disable_pstate_method_mask >> 4) & 1) || - disable_fams2; - pmo_options->disable_fams2 = disable_fams2; - - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || - in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; - pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } -static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +static void dml21_init(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) { - switch (in_dc->ctx->dce_version) { - case DCN_VERSION_4_01: - (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp; - break; - default: - (*dml_ctx)->v21.dml_init.options.project_id = dml2_project_invalid; - } - (*dml_ctx)->architecture = dml2_architecture_21; + dml_ctx->architecture = dml2_architecture_21; - /* Store configuration options */ - (*dml_ctx)->config = *config; + dml21_populate_configuration_options(in_dc, dml_ctx, config); DC_FP_START(); - /*Initialize SOCBB and DCNIP params */ - dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc); - dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc); - dml21_apply_soc_bb_overrides(&(*dml_ctx)->v21.dml_init, config, in_dc); - - /* apply debug overrides */ - dml21_apply_debug_options(in_dc, *dml_ctx, config); + dml21_populate_dml_init_params(&dml_ctx->v21.dml_init, &dml_ctx->config, in_dc); - /*Initialize DML21 instance */ - dml2_initialize_instance(&(*dml_ctx)->v21.dml_init); + dml2_initialize_instance(&dml_ctx->v21.dml_init); DC_FP_END(); } @@ -111,7 +73,7 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s if (!dml21_allocate_memory(dml_ctx)) return false; - dml21_init(in_dc, dml_ctx, config); + dml21_init(in_dc, *dml_ctx, config); return true; } @@ -262,7 +224,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); /* Populate stream, plane mappings and other fields in display config. */ + DC_FP_START(); result = dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + DC_FP_END(); if (!result) return false; @@ -317,7 +281,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co dml_ctx->config.svp_pstate.callbacks.release_phantom_streams_and_planes(in_dc, context); mode_support->dml2_instance = dml_init->dml2_instance; + DC_FP_START(); dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); + DC_FP_END(); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); @@ -328,12 +294,13 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co return true; } -bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, bool fast_validate) +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode) { bool out = false; - /* Use dml_validate_only for fast_validate path */ - if (fast_validate) + /* Use dml21_check_mode_support for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) out = dml21_check_mode_support(in_dc, context, dml_ctx); else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); @@ -496,7 +463,7 @@ bool dml21_create_copy(struct dml2_context **dst_dml_ctx, return true; } -void dml21_reinit(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config) { dml21_init(in_dc, dml_ctx, config); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h index 42e715024bc9..15f92029d2e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.h @@ -14,6 +14,7 @@ struct dc; struct dc_state; struct dml2_configuration_options; struct dml2_context; +enum dc_validate_mode; /** * dml2_create - Creates dml21_context. @@ -33,22 +34,23 @@ void dml21_copy(struct dml2_context *dst_dml_ctx, struct dml2_context *src_dml_ctx); bool dml21_create_copy(struct dml2_context **dst_dml_ctx, struct dml2_context *src_dml_ctx); -void dml21_reinit(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config); +void dml21_reinit(const struct dc *in_dc, struct dml2_context *dml_ctx, const struct dml2_configuration_options *config); /** * dml21_validate - Determines if a display configuration is supported or not. * @in_dc: dc. * @context: dc_state to be validated. - * @fast_validate: Fast validate will not populate context.res_ctx. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX + * will not populate context.res_ctx. * * Based on fast_validate option internally would call: * - * -dml21_mode_check_and_programming - for non fast_validate option + * -dml21_mode_check_and_programming - for DC_VALIDATE_MODE_AND_PROGRAMMING option * Calculates if dc_state can be supported on the input display * configuration. If supported, generates the necessary HW * programming for the new dc_state. * - * -dml21_check_mode_support - for fast_validate option + * -dml21_check_mode_support - for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX option * Calculates if dc_state can be supported for the input display * config. @@ -56,7 +58,8 @@ void dml21_reinit(const struct dc *in_dc, struct dml2_context **dml_ctx, const s * separate dc_states for validation. * Return: True if mode is supported, false otherwise. */ -bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, bool fast_validate); +bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx, + enum dc_validate_mode validate_mode); /* Prepare hubp mcache_regs for hubp mcache ID and split coordinate programming */ void dml21_prepare_mcache_programming(struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index c047d56527c4..a64ec4dcf11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -43,5 +43,4 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o */ bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out); - #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 84c90050668c..91955bbe24b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -46,6 +46,7 @@ struct dml2_display_dlg_regs { uint32_t dst_y_delta_drq_limit; uint32_t refcyc_per_vm_dmdata; uint32_t dmdata_dl_delta; + uint32_t dst_y_svp_drq_limit; // MRQ uint32_t refcyc_per_meta_chunk_vblank_l; @@ -158,6 +159,8 @@ struct dml2_dchub_watermark_regs { uint32_t sr_exit; uint32_t sr_enter_z8; uint32_t sr_exit_z8; + uint32_t sr_enter_low_power; + uint32_t sr_exit_low_power; uint32_t uclk_pstate; uint32_t fclk_pstate; uint32_t temp_read_or_ppt; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index 255f05de362c..e8dc6471c0be 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -222,6 +222,7 @@ struct dml2_composition_cfg { struct { bool enabled; + bool upsp_enabled; struct { double h_ratio; double v_ratio; @@ -426,6 +427,7 @@ struct dml2_stream_parameters { struct dml2_display_cfg { bool gpuvm_enable; + bool ffbm_enable; bool hostvm_enable; // Allocate DET proportionally between streams based on pixel rate diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 5f0bc42d1d2f..176f55947664 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -93,12 +93,17 @@ struct dml2_soc_power_management_parameters { double dram_clk_change_write_only_us; double fclk_change_blackout_us; double g7_ppt_blackout_us; + double g7_temperature_read_blackout_us; double stutter_enter_plus_exit_latency_us; double stutter_exit_latency_us; + double low_power_stutter_enter_plus_exit_latency_us; + double low_power_stutter_exit_latency_us; double z8_stutter_enter_plus_exit_latency_us; double z8_stutter_exit_latency_us; double z8_min_idle_time; double g6_temp_read_blackout_us[DML_MAX_CLK_TABLE_SIZE]; + double type_b_dram_clk_change_blackout_us; + double type_b_ppt_blackout_us; }; struct dml2_clk_table { @@ -130,6 +135,7 @@ struct dml2_soc_state_table { struct dml2_soc_vmin_clock_limits { unsigned long dispclk_khz; + unsigned long dcfclk_khz; }; struct dml2_soc_bb { @@ -138,6 +144,7 @@ struct dml2_soc_bb { struct dml2_soc_power_management_parameters power_management_parameters; struct dml2_soc_vmin_clock_limits vmin_limit; + double lower_bound_bandwidth_dchub; unsigned int dprefclk_mhz; unsigned int xtalclk_mhz; unsigned int pcie_refclk_mhz; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 0dbf886d8926..41adb1104d0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -16,9 +16,9 @@ struct dml2_instance; enum dml2_project_id { dml2_project_invalid = 0, - dml2_project_dcn4x_stage1 = 1, - dml2_project_dcn4x_stage2 = 2, - dml2_project_dcn4x_stage2_auto_drr_svp = 3, + dml2_project_dcn4x_stage1, + dml2_project_dcn4x_stage2, + dml2_project_dcn4x_stage2_auto_drr_svp, }; enum dml2_pstate_change_support { @@ -53,7 +53,9 @@ enum dml2_output_type_and_rate__rate { dml2_output_rate_hdmi_rate_6x4 = 9, dml2_output_rate_hdmi_rate_8x4 = 10, dml2_output_rate_hdmi_rate_10x4 = 11, - dml2_output_rate_hdmi_rate_12x4 = 12 + dml2_output_rate_hdmi_rate_12x4 = 12, + dml2_output_rate_hdmi_rate_16x4 = 13, + dml2_output_rate_hdmi_rate_20x4 = 14 }; struct dml2_pmo_options { @@ -279,7 +281,10 @@ struct dml2_per_stream_programming { } phantom_stream; union dmub_cmd_fams2_config fams2_base_params; - union dmub_cmd_fams2_config fams2_sub_params; + union { + union dmub_cmd_fams2_config fams2_sub_params; + union dmub_fams2_stream_static_sub_state_v2 fams2_sub_params_v2; + }; }; //----------------- @@ -412,6 +417,8 @@ struct dml2_display_cfg_programming { struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition + uint8_t base_percent_efficiency; //LP1 + uint8_t low_power_percent_efficiency; //LP2 } stutter; struct { @@ -674,9 +681,14 @@ struct dml2_display_cfg_programming { // unlimited # of mcache struct dml2_mcache_surface_allocation non_optimized_mcache_allocation[DML2_MAX_PLANES]; + bool failed_prefetch; + bool failed_uclk_pstate; bool failed_mcache_validation; bool failed_dpmm; bool failed_mode_programming; + bool failed_mode_programming_dcfclk; + bool failed_mode_programming_prefetch; + bool failed_mode_programming_flip; bool failed_map_watermarks; } informative; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c4dad7164d31..bf62d42b3f78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -1238,18 +1238,27 @@ static void CalculateDETBufferSize( static double CalculateRequiredDispclk( enum dml2_odm_mode ODMMode, - double PixelClock) + double PixelClock, + bool isTMDS420) { + double DispClk; if (ODMMode == dml2_odm_mode_combine_4to1) { - return PixelClock / 4.0; + DispClk = PixelClock / 4.0; } else if (ODMMode == dml2_odm_mode_combine_3to1) { - return PixelClock / 3.0; + DispClk = PixelClock / 3.0; } else if (ODMMode == dml2_odm_mode_combine_2to1) { - return PixelClock / 2.0; + DispClk = PixelClock / 2.0; } else { - return PixelClock; + DispClk = PixelClock; + } + + if (isTMDS420) { + double TMDS420MinPixClock = PixelClock / 2.0; + DispClk = math_max2(DispClk, TMDS420MinPixClock); } + + return DispClk; } static double TruncToValidBPP( @@ -4122,11 +4131,12 @@ static noinline_for_stack void CalculateODMMode( bool success; bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0); enum dml2_odm_mode DecidedODMMode; + bool isTMDS420 = (OutFormat == dml2_420 && Output == dml2_hdmi); - SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock); - SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock); + SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock, isTMDS420); + SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock, isTMDS420); #ifdef __DML_VBA_DEBUG__ DML_LOG_VERBOSE("DML::%s: ODMUse = %d\n", __func__, ODMUse); DML_LOG_VERBOSE("DML::%s: Output = %d\n", __func__, Output); @@ -4685,7 +4695,10 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } @@ -4858,7 +4871,7 @@ static double get_urgent_bandwidth_required( double ReadBandwidthChroma[], double PrefetchBandwidthLuma[], double PrefetchBandwidthChroma[], - double PrefetchBandwidthOto[], + double PrefetchBandwidthMax[], double excess_vactive_fill_bw_l[], double excess_vactive_fill_bw_c[], double cursor_bw[], @@ -4922,9 +4935,9 @@ static double get_urgent_bandwidth_required( l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; - l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_max = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthMax[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; - surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto); + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_max); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); @@ -5122,7 +5135,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; - *p->RequiredPrefetchBWOTO = 0.0; + *p->RequiredPrefetchBWMax = 0.0; dcc_mrq_enable = (p->dcc_enable && p->mrq_present); @@ -5353,7 +5366,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule * and the required bandwidth increases when going from ms to mp */ - *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; + *p->RequiredPrefetchBWMax = s->prefetch_bw_oto; #ifdef __DML_VBA_DEBUG__ DML_LOG_VERBOSE("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); @@ -5715,8 +5728,14 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + + /* equ bw should be propagated so a ceiling of the equ bw is accounted for prior to mode programming. + * Overall bandwidth may be lower when going from mode support to mode programming but final pixel data + * bandwidth may end up higher than what was calculated in mode support. + */ + *p->RequiredPrefetchBWMax = math_max2(s->prefetch_bw_equ, *p->RequiredPrefetchBWMax); #ifdef __DML_VBA_DEBUG__ DML_LOG_VERBOSE("DML::%s: Using equ bw scheduling for prefetch\n", __func__); @@ -6112,7 +6131,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, - l->zero_array, //PrefetchBWOTO + l->zero_array, //PrefetchBWMax l->zero_array, l->zero_array, l->zero_array, @@ -6149,7 +6168,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, - l->zero_array, //PrefetchBWOTO + l->zero_array, //PrefetchBWMax p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6186,7 +6205,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, - p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6223,7 +6242,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, - p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw + p->prefetch_bandwidth_max, // to prevent ms/mp mismatch where mp prefetch bw > ms prefetch bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6260,7 +6279,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, - p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw + p->prefetch_bandwidth_max, // to prevent ms/mp mismatches where mp prefetch bw > ms prefetch bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -7487,7 +7506,7 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c - CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &mode_lib->ms.RequiredPrefetchBWMax[k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; @@ -7632,7 +7651,7 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -7799,7 +7818,7 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = mode_lib->ms.RequiredPrefetchBWMax; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -7905,6 +7924,7 @@ static noinline_for_stack void dml_core_ms_prefetch_check(struct dml2_core_inter } + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -11253,7 +11273,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; - CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWMax = &s->dummy_single_array[0][k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; @@ -11396,7 +11416,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; @@ -11536,7 +11556,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw; calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw; calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0]; + calculate_peak_bandwidth_params->prefetch_bandwidth_max = s->dummy_single_array[0]; calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw; calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma; calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma; @@ -11880,7 +11900,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } //Maximum Bandwidth Used - s->TotalWRBandwidth = 0; + mode_lib->mp.TotalWRBandwidth = 0; for (k = 0; k < display_cfg->num_streams; ++k) { s->WRBandwidth = 0; if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { @@ -11889,7 +11909,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); - s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; + mode_lib->mp.TotalWRBandwidth = mode_lib->mp.TotalWRBandwidth + s->WRBandwidth; } } @@ -13059,6 +13079,10 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4; else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4) out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_16x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_16x4; + else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_20x4) + out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_20x4; out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k]; out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k]; @@ -13243,7 +13267,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; - out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0; + out->informative.misc.WritebackRequiredBandwidth = mode_lib->mp.TotalWRBandwidth / 1000.0; out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index 28394de02885..640087e862f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -10,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index bdee6ad7bc59..ffb8c09f37a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -102,6 +102,7 @@ struct dml2_core_internal_DmlPipe { double DCFClkDeepSleep; unsigned int DPPPerSurface; bool ScalerEnabled; + bool UPSPEnabled; enum dml2_rotation_angle RotationAngle; bool mirrored; unsigned int ViewportHeight; @@ -186,7 +187,9 @@ enum dml2_core_internal_output_type_rate { dml2_core_internal_output_rate_hdmi_rate_6x4 = 9, dml2_core_internal_output_rate_hdmi_rate_8x4 = 10, dml2_core_internal_output_rate_hdmi_rate_10x4 = 11, - dml2_core_internal_output_rate_hdmi_rate_12x4 = 12 + dml2_core_internal_output_rate_hdmi_rate_12x4 = 12, + dml2_core_internal_output_rate_hdmi_rate_16x4 = 13, + dml2_core_internal_output_rate_hdmi_rate_20x4 = 14 }; struct dml2_core_internal_watermarks { @@ -198,6 +201,8 @@ struct dml2_core_internal_watermarks { double WritebackFCLKChangeWatermark; double StutterExitWatermark; double StutterEnterPlusExitWatermark; + double LowPowerStutterExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; double Z8StutterExitWatermark; double Z8StutterEnterPlusExitWatermark; double USRRetrainingWatermark; @@ -260,12 +265,14 @@ struct dml2_core_internal_mode_support_info { bool AvgBandwidthSupport; bool UrgVactiveBandwidthSupport; bool EnoughUrgentLatencyHidingSupport; + bool PrefetchScheduleSupported; bool PrefetchSupported; bool PrefetchBandwidthSupported; bool DynamicMetadataSupported; bool VRatioInPrefetchSupported; bool DISPCLK_DPPCLK_Support; bool TotalAvailablePipesSupport; + bool ODMSupport; bool ModeSupport; bool ViewportSizeSupport; @@ -314,9 +321,7 @@ struct dml2_core_internal_mode_support_info { double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; // same as urg_bandwidth, except not scaled by urg burst factor double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; - bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; - double max_urgent_latency_us; double max_non_urgent_latency_us; double avg_non_urgent_latency_us; @@ -329,6 +334,8 @@ struct dml2_core_internal_mode_support_info { bool temp_read_or_ppt_support; struct dml2_core_internal_watermarks watermarks; + bool dcfclk_support; + bool qos_bandwidth_support; }; struct dml2_core_internal_mode_support { @@ -350,9 +357,11 @@ struct dml2_core_internal_mode_support { double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting double GlobalDPPCLK; /// <brief the Max DPPCLK freq out of all pipes + double GlobalDTBCLK; /// <brief the Max DTBCLK freq out of all pipes double uclk_freq_mhz; double dram_bw_mbps; double max_dram_bw_mbps; + double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps double MaxFabricClock; /// <brief Basically just the clock freq at the min (or given) state double MaxDCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting @@ -394,9 +403,13 @@ struct dml2_core_internal_mode_support { double TWait[DML2_MAX_PLANES]; bool UnboundedRequestEnabled; + unsigned int compbuf_reserved_space_64b; + bool hw_debug5; unsigned int CompressedBufferSizeInkByte; double VRatioPreY[DML2_MAX_PLANES]; double VRatioPreC[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; + unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; unsigned int swath_width_luma_ub[DML2_MAX_PLANES]; unsigned int swath_width_chroma_ub[DML2_MAX_PLANES]; unsigned int RequiredSlots[DML2_MAX_PLANES]; @@ -417,8 +430,8 @@ struct dml2_core_internal_mode_support { double dst_y_prefetch[DML2_MAX_PLANES]; double LinesForVM[DML2_MAX_PLANES]; double LinesForDPTERow[DML2_MAX_PLANES]; - double SwathWidthYSingleDPP[DML2_MAX_PLANES]; - double SwathWidthCSingleDPP[DML2_MAX_PLANES]; + unsigned int SwathWidthYSingleDPP[DML2_MAX_PLANES]; + unsigned int SwathWidthCSingleDPP[DML2_MAX_PLANES]; unsigned int BytePerPixelY[DML2_MAX_PLANES]; unsigned int BytePerPixelC[DML2_MAX_PLANES]; double BytePerPixelInDETY[DML2_MAX_PLANES]; @@ -469,13 +482,58 @@ struct dml2_core_internal_mode_support { double mall_prefetch_sdp_overhead_factor[DML2_MAX_PLANES]; // overhead to the imall or phantom pipe double mall_prefetch_dram_overhead_factor[DML2_MAX_PLANES]; + bool is_using_mall_for_ss[DML2_MAX_PLANES]; + unsigned int meta_row_width_chroma[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightC[DML2_MAX_PLANES]; + bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; + unsigned int meta_req_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_width_luma_ub[DML2_MAX_PLANES]; + unsigned int meta_req_width[DML2_MAX_PLANES]; + unsigned int meta_row_width[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthY[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear[DML2_MAX_PLANES]; + unsigned int PTERequestSizeY[DML2_MAX_PLANES]; + unsigned int dpte_row_width_chroma_ub[DML2_MAX_PLANES]; + unsigned int PixelPTEReqWidthC[DML2_MAX_PLANES]; + unsigned int meta_pte_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int dpte_row_height_linear_chroma[DML2_MAX_PLANES]; + unsigned int PTERequestSizeC[DML2_MAX_PLANES]; + unsigned int meta_req_height[DML2_MAX_PLANES]; + unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES]; + unsigned int meta_req_width_chroma[DML2_MAX_PLANES]; + unsigned int PixelPTEReqHeightY[DML2_MAX_PLANES]; + unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; + unsigned int vm_group_bytes[DML2_MAX_PLANES]; + unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; + unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; + double TSetup[DML2_MAX_PLANES]; + double Tdmdl_vm_raw[DML2_MAX_PLANES]; + double Tdmdl_raw[DML2_MAX_PLANES]; + unsigned int VStartupMin[DML2_MAX_PLANES]; /// <brief Minimum vstartup to meet the prefetch schedule (i.e. the prefetch solution can be found at this vstartup time); not the actual global sync vstartup pos. + double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; + double MaxActiveFCLKChangeLatencySupported; + // Backend bool RequiresDSC[DML2_MAX_PLANES]; bool RequiresFEC[DML2_MAX_PLANES]; double OutputBpp[DML2_MAX_PLANES]; + double DesiredOutputBpp[DML2_MAX_PLANES]; + double PixelClockBackEnd[DML2_MAX_PLANES]; unsigned int DSCDelay[DML2_MAX_PLANES]; enum dml2_core_internal_output_type OutputType[DML2_MAX_PLANES]; enum dml2_core_internal_output_type_rate OutputRate[DML2_MAX_PLANES]; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NumberOfDPPDSC; + enum dml2_odm_mode ODMModeNoDSC; + enum dml2_odm_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + unsigned int EstimatedNumberOfDSCSlices[DML2_MAX_PLANES]; // Bandwidth Related Info double BandwidthAvailableForImmediateFlip; @@ -484,8 +542,14 @@ struct dml2_core_internal_mode_support { double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; - /* oto bw should also be considered when calculating peak urgent bw to avoid situations oto/equ mismatches between ms and mp */ - double RequiredPrefetchBWOTO[DML2_MAX_PLANES]; + /* Max bandwidth calculated from prefetch schedule should be considered in addition to the pixel data bw to avoid ms/mp mismatches. + * 1. oto bw should also be considered when calculating peak urgent bw to avoid situations oto/equ mismatches between ms and mp + * + * 2. equ bandwidth needs to be considered for calculating peak urgent bw when equ schedule is used in mode support. + * Some slight difference in variables may cause the pixel data bandwidth to be higher + * even though overall equ prefetch bandwidths can be lower going from ms to mp + */ + double RequiredPrefetchBWMax[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; double prefetch_cursor_bw[DML2_MAX_PLANES]; double prefetch_vmrow_bw[DML2_MAX_PLANES]; @@ -538,7 +602,44 @@ struct dml2_core_internal_mode_support { bool mall_comb_mcache_c[DML2_MAX_PLANES]; bool lc_comb_mcache[DML2_MAX_PLANES]; + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + + unsigned int meta_row_height_luma[DML2_MAX_PLANES]; + unsigned int meta_row_height_chroma[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + + unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; + unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + + unsigned int MaximumVStartup[DML2_MAX_PLANES]; + + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + unsigned int DSTYAfterScaler[DML2_MAX_PLANES]; + unsigned int DSTXAfterScaler[DML2_MAX_PLANES]; + + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; }; /// @brief A mega structure that houses various info for model programming step. @@ -548,6 +649,7 @@ struct dml2_core_internal_mode_program { double FabricClock; /// <brief Basically just the clock freq at the min (or given) state //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting double dram_bw_mbps; + double min_available_urgent_bandwidth_MBps; /// <brief Minimum guaranteed available urgent return bandwidth in MBps double uclk_freq_mhz; unsigned int NoOfDPP[DML2_MAX_PLANES]; enum dml2_odm_mode ODMMode[DML2_MAX_PLANES]; @@ -599,6 +701,8 @@ struct dml2_core_internal_mode_program { unsigned int MacroTileHeightC[DML2_MAX_PLANES]; unsigned int MacroTileWidthY[DML2_MAX_PLANES]; unsigned int MacroTileWidthC[DML2_MAX_PLANES]; + double MaximumSwathWidthLuma[DML2_MAX_PLANES]; + double MaximumSwathWidthChroma[DML2_MAX_PLANES]; bool surf_linear128_l[DML2_MAX_PLANES]; bool surf_linear128_c[DML2_MAX_PLANES]; @@ -631,6 +735,14 @@ struct dml2_core_internal_mode_program { double UrgentBurstFactorChroma[DML2_MAX_PLANES]; double UrgentBurstFactorChromaPre[DML2_MAX_PLANES]; + double MaximumSwathWidthInLineBufferLuma; + double MaximumSwathWidthInLineBufferChroma; + + unsigned int vmpg_width_y[DML2_MAX_PLANES]; + unsigned int vmpg_height_y[DML2_MAX_PLANES]; + unsigned int vmpg_width_c[DML2_MAX_PLANES]; + unsigned int vmpg_height_c[DML2_MAX_PLANES]; + double meta_row_bw[DML2_MAX_PLANES]; unsigned int meta_row_bytes[DML2_MAX_PLANES]; unsigned int meta_req_width[DML2_MAX_PLANES]; @@ -652,7 +764,9 @@ struct dml2_core_internal_mode_program { unsigned int PTERequestSizeC[DML2_MAX_PLANES]; double TWait[DML2_MAX_PLANES]; + double Tdmdl_vm_raw[DML2_MAX_PLANES]; double Tdmdl_vm[DML2_MAX_PLANES]; + double Tdmdl_raw[DML2_MAX_PLANES]; double Tdmdl[DML2_MAX_PLANES]; double TSetup[DML2_MAX_PLANES]; unsigned int dpde0_bytes_per_frame_ub_l[DML2_MAX_PLANES]; @@ -684,6 +798,38 @@ struct dml2_core_internal_mode_program { double TCalc; unsigned int TotImmediateFlipBytes; + unsigned int MaxTotalDETInKByte; + unsigned int NomDETInKByte; + unsigned int MinCompressedBufferSizeInKByte; + double PixelClockBackEnd[DML2_MAX_PLANES]; + double OutputBpp[DML2_MAX_PLANES]; + bool dsc_enable[DML2_MAX_PLANES]; + unsigned int num_dsc_slices[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_l[DML2_MAX_PLANES]; + unsigned int meta_row_bytes_per_row_ub_c[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; + unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES]; + unsigned int cursor_bytes_per_line[DML2_MAX_PLANES]; + unsigned int MaxVStartupLines[DML2_MAX_PLANES]; /// <brief more like vblank for the plane's OTG + double HostVMInefficiencyFactor; + double HostVMInefficiencyFactorPrefetch; + unsigned int tdlut_pte_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_frame[DML2_MAX_PLANES]; + unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES]; + double tdlut_opt_time[DML2_MAX_PLANES]; + double tdlut_drain_time[DML2_MAX_PLANES]; + unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES]; + double Tvm_trips_flip[DML2_MAX_PLANES]; + double Tr0_trips_flip[DML2_MAX_PLANES]; + double Tvm_trips_flip_rounded[DML2_MAX_PLANES]; + double Tr0_trips_flip_rounded[DML2_MAX_PLANES]; + bool immediate_flip_required; // any pipes need immediate flip + double SOCCLK; /// <brief Basically just the clock freq at the min (or given) state + double TotalWRBandwidth; + double max_urgent_latency_us; + double df_response_time_us; + // ------------------- // Output // ------------------- @@ -694,9 +840,12 @@ struct dml2_core_internal_mode_program { // Support bool UrgVactiveBandwidthSupport; + bool PrefetchScheduleSupported; + bool UrgentBandwidthSupport; bool PrefetchModeSupported; // <brief Is the prefetch mode (bandwidth and latency) supported bool ImmediateFlipSupported; bool ImmediateFlipSupportedForPipe[DML2_MAX_PLANES]; + bool dcfclk_support; // Clock double Dcfclk; @@ -730,6 +879,9 @@ struct dml2_core_internal_mode_program { double Z8StutterEfficiency; unsigned int Z8NumberOfStutterBurstsPerFrame; double Z8StutterEfficiencyNotIncludingVBlank; + double LowPowerStutterEfficiency; + double LowPowerStutterEfficiencyNotIncludingVBlank; + unsigned int LowPowerNumberOfStutterBurstsPerFrame; double StutterPeriod; double Z8StutterEfficiencyBestCase; unsigned int Z8NumberOfStutterBurstsPerFrameBestCase; @@ -788,7 +940,7 @@ struct dml2_core_internal_mode_program { // RQ registers bool PTE_BUFFER_MODE[DML2_MAX_PLANES]; unsigned int BIGK_FRAGMENT_SIZE[DML2_MAX_PLANES]; - + double VActiveLatencyHidingUs[DML2_MAX_PLANES]; unsigned int SubViewportLinesNeededInMALL[DML2_MAX_PLANES]; bool is_using_mall_for_ss[DML2_MAX_PLANES]; @@ -869,6 +1021,8 @@ struct dml2_core_internal_SOCParametersList { double FCLKChangeLatency; double SRExitTime; double SREnterPlusExitTime; + double SRExitTimeLowPower; + double SREnterPlusExitTimeLowPower; double SRExitZ8Time; double SREnterPlusExitZ8Time; double USRRetrainingLatency; @@ -1001,10 +1155,10 @@ struct dml2_core_calcs_mode_programming_locals { double dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; double surface_dummy_bw[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; double surface_dummy_bw0[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max][DML2_MAX_PLANES]; - unsigned int dummy_integer_array[2][DML2_MAX_PLANES]; + unsigned int dummy_integer_array[4][DML2_MAX_PLANES]; enum dml2_output_encoder_class dummy_output_encoder_array[DML2_MAX_PLANES]; double dummy_single_array[2][DML2_MAX_PLANES]; - unsigned int dummy_long_array[4][DML2_MAX_PLANES]; + unsigned int dummy_long_array[8][DML2_MAX_PLANES]; bool dummy_boolean_array[2][DML2_MAX_PLANES]; bool dummy_boolean[2]; double dummy_single[2]; @@ -1028,7 +1182,6 @@ struct dml2_core_calcs_mode_programming_locals { double dlg_vblank_start; double LSetup; double blank_lines_remaining; - double TotalWRBandwidth; double WRBandwidth; struct dml2_core_internal_DmlPipe myPipe; double PixelClockBackEndFactor; @@ -1153,6 +1306,7 @@ struct dml2_core_calcs_CalculateVMRowAndSwath_params { unsigned int HostVMMinPageSize; unsigned int DCCMetaBufferSizeBytes; bool mrq_present; + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; // Output bool *PTEBufferSizeNotExceeded; @@ -1389,7 +1543,7 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals { double vm_row_bw; double flip_and_active_bw; double flip_and_prefetch_bw; - double flip_and_prefetch_bw_oto; + double flip_and_prefetch_bw_max; double active_and_excess_bw; }; @@ -1418,6 +1572,7 @@ struct dml2_core_shared_CalculateFlipSchedule_locals { struct dml2_core_shared_rq_dlg_get_dlg_reg_locals { unsigned int plane_idx; + unsigned int stream_idx; enum dml2_source_format_class source_format; const struct dml2_timing_cfg *timing; bool dual_plane; @@ -1625,6 +1780,9 @@ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params { double *BytePerPixDETC; unsigned int *DPPPerSurface; bool mrq_present; + unsigned int dummy[2][DML2_MAX_PLANES]; + unsigned int swath_width_luma_ub_single_dpp[DML2_MAX_PLANES]; + unsigned int swath_width_chroma_ub_single_dpp[DML2_MAX_PLANES]; // output unsigned int *req_per_swath_ub_l; @@ -1642,6 +1800,8 @@ struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params { unsigned int *DETBufferSizeC; unsigned int *full_swath_bytes_l; unsigned int *full_swath_bytes_c; + unsigned int *full_swath_bytes_single_dpp_l; + unsigned int *full_swath_bytes_single_dpp_c; bool *UnboundedRequestEnabled; unsigned int *compbuf_reserved_space_64b; unsigned int *CompressedBufferSizeInkByte; @@ -1698,9 +1858,11 @@ struct dml2_core_calcs_CalculateStutterEfficiency_params { unsigned int CompbufReservedSpaceZs; bool hw_debug5; double SRExitTime; + double SRExitTimeLowPower; double SRExitZ8Time; bool SynchronizeTimings; double StutterEnterPlusExitWatermark; + double LowPowerStutterEnterPlusExitWatermark; double Z8StutterEnterPlusExitWatermark; bool ProgressiveToInterlaceUnitInOPP; double *MinTTUVBlank; @@ -1726,7 +1888,10 @@ struct dml2_core_calcs_CalculateStutterEfficiency_params { // output double *StutterEfficiencyNotIncludingVBlank; double *StutterEfficiency; + double *LowPowerStutterEfficiencyNotIncludingVBlank; + double *LowPowerStutterEfficiency; unsigned int *NumberOfStutterBurstsPerFrame; + unsigned int *LowPowerNumberOfStutterBurstsPerFrame; double *Z8StutterEfficiencyNotIncludingVBlank; double *Z8StutterEfficiency; unsigned int *Z8NumberOfStutterBurstsPerFrame; @@ -1801,7 +1966,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *VRatioPrefetchC; double *RequiredPrefetchPixelDataBWLuma; double *RequiredPrefetchPixelDataBWChroma; - double *RequiredPrefetchBWOTO; + double *RequiredPrefetchBWMax; bool *NotEnoughTimeForDynamicMetadata; double *Tno_bw; double *Tno_bw_flip; @@ -2038,7 +2203,7 @@ struct dml2_core_calcs_calculate_peak_bandwidth_required_params { double *surface_read_bandwidth_c; double *prefetch_bandwidth_l; double *prefetch_bandwidth_c; - double *prefetch_bandwidth_oto; + double *prefetch_bandwidth_max; double *excess_vactive_fill_bw_l; double *excess_vactive_fill_bw_c; double *cursor_bw; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 7a220c0141c2..5f301befed16 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -464,7 +464,7 @@ bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) { - return (sw_mode == dml2_sw_linear || sw_mode == dml2_sw_linear_256b || sw_mode == dml2_linear_64elements); + return sw_mode == dml2_sw_linear; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index f486b090bbfc..22969a533a7b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -389,9 +389,6 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo if (result) result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); - if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.deepsleep_dcfclk_khz, &state_table->dcfclk); - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (result) result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 3861bc6c9621..dfd01440737d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -20,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index cd3fbc0591d8..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -15,7 +15,7 @@ bool dml2_mcg_create(enum dml2_project_id project_id, struct dml2_mcg_instance * { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_mcg_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index e763c8e45da8..1b9579a32ff2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -48,18 +48,19 @@ static void set_reserved_time_on_all_planes_with_stream_index(struct display_con static void remove_duplicates(double *list_a, int *list_a_size) { - int cur_element = 0; - // For all elements b[i] in list_b[] - while (cur_element < *list_a_size - 1) { - if (list_a[cur_element] == list_a[cur_element + 1]) { - for (int j = cur_element + 1; j < *list_a_size - 1; j++) { - list_a[j] = list_a[j + 1]; - } - *list_a_size = *list_a_size - 1; - } else { - cur_element++; + int j = 0; + + if (*list_a_size == 0) + return; + + for (int i = 1; i < *list_a_size; i++) { + if (list_a[j] != list_a[i]) { + j++; + list_a[j] = list_a[i]; } } + + *list_a_size = j + 1; } static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index 7ed0242a4b33..55d2464365d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -26,7 +26,7 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (out == 0) + if (!out) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index b226225103c3..611c80f4f1bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -10,15 +10,74 @@ #define DML_LOG_LEVEL_DEFAULT DML_LOG_LEVEL_WARN #define DML_LOG_INTERNAL(fmt, ...) dm_output_to_console(fmt, ## __VA_ARGS__) -/* ASSERT with message output */ -#define DML_ASSERT_MSG(condition, fmt, ...) \ - do { \ - if (!(condition)) { \ - DML_LOG_ERROR("DML ASSERT hit in %s line %d\n", __func__, __LINE__); \ - DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ - DML_ASSERT(condition); \ - } \ - } while (0) +/* private helper macros */ +#define _BOOL_FORMAT(field) "%s", field ? "true" : "false" +#define _UINT_FORMAT(field) "%u", field +#define _INT_FORMAT(field) "%d", field +#define _DOUBLE_FORMAT(field) "%lf", field +#define _ELEMENT_FUNC "function" +#define _ELEMENT_COMP_IF "component_interface" +#define _ELEMENT_TOP_IF "top_interface" +#define _LOG_ENTRY(element) do { \ + DML_LOG_INTERNAL("<"element" name=\""); \ + DML_LOG_INTERNAL(__func__); \ + DML_LOG_INTERNAL("\">\n"); \ +} while (0) +#define _LOG_EXIT(element) DML_LOG_INTERNAL("</"element">\n") +#define _LOG_SCALAR(field, format) do { \ + DML_LOG_INTERNAL(#field" = "format(field)); \ + DML_LOG_INTERNAL("\n"); \ +} while (0) +#define _LOG_ARRAY(field, size, format) do { \ + DML_LOG_INTERNAL(#field " = ["); \ + for (int _i = 0; _i < (int) size; _i++) { \ + DML_LOG_INTERNAL(format(field[_i])); \ + if (_i + 1 == (int) size) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ +}} while (0) +#define _LOG_2D_ARRAY(field, size0, size1, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL(format(field[_i][_j])); \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) +#define _LOG_3D_ARRAY(field, size0, size1, size2, format) do { \ + DML_LOG_INTERNAL(#field" = ["); \ + for (int _i = 0; _i < (int) size0; _i++) { \ + DML_LOG_INTERNAL("\n\t["); \ + for (int _j = 0; _j < (int) size1; _j++) { \ + DML_LOG_INTERNAL("["); \ + for (int _k = 0; _k < (int) size2; _k++) { \ + DML_LOG_INTERNAL(format(field[_i][_j][_k])); \ + if (_k + 1 == (int) size2) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_j + 1 == (int) size1) \ + DML_LOG_INTERNAL("]"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ + if (_i + 1 == (int) size0) \ + DML_LOG_INTERNAL("]\n"); \ + else \ + DML_LOG_INTERNAL(", "); \ + } \ +} while (0) /* fatal errors for unrecoverable DML states until a full reset */ #define DML_LOG_LEVEL_FATAL 0 @@ -28,7 +87,7 @@ #define DML_LOG_LEVEL_WARN 2 /* high level tracing of DML interfaces */ #define DML_LOG_LEVEL_INFO 3 -/* detailed tracing of DML internal components */ +/* tracing of DML internal executions */ #define DML_LOG_LEVEL_DEBUG 4 /* detailed tracing of DML calculation procedure */ #define DML_LOG_LEVEL_VERBOSE 5 @@ -37,30 +96,94 @@ #define DML_LOG_LEVEL DML_LOG_LEVEL_DEFAULT #endif /* #ifndef DML_LOG_LEVEL */ +/* public macros for DML_LOG_LEVEL_FATAL and up */ #define DML_LOG_FATAL(fmt, ...) DML_LOG_INTERNAL("[DML FATAL] " fmt, ## __VA_ARGS__) + +/* public macros for DML_LOG_LEVEL_ERROR and up */ #if DML_LOG_LEVEL >= DML_LOG_LEVEL_ERROR #define DML_LOG_ERROR(fmt, ...) DML_LOG_INTERNAL("[DML ERROR] "fmt, ## __VA_ARGS__) +#define DML_ASSERT_MSG(condition, fmt, ...) \ + do { \ + if (!(condition)) { \ + DML_LOG_ERROR("ASSERT hit in %s line %d\n", __func__, __LINE__); \ + DML_LOG_ERROR(fmt, ## __VA_ARGS__); \ + DML_ASSERT(condition); \ + } \ + } while (0) #else #define DML_LOG_ERROR(fmt, ...) ((void)0) +#define DML_ASSERT_MSG(condition, fmt, ...) ((void)0) #endif + +/* public macros for DML_LOG_LEVEL_WARN and up */ #if DML_LOG_LEVEL >= DML_LOG_LEVEL_WARN #define DML_LOG_WARN(fmt, ...) DML_LOG_INTERNAL("[DML WARN] "fmt, ## __VA_ARGS__) #else #define DML_LOG_WARN(fmt, ...) ((void)0) #endif + +/* public macros for DML_LOG_LEVEL_INFO and up */ #if DML_LOG_LEVEL >= DML_LOG_LEVEL_INFO #define DML_LOG_INFO(fmt, ...) DML_LOG_INTERNAL("[DML INFO] "fmt, ## __VA_ARGS__) +#define DML_LOG_TOP_IF_ENTER() _LOG_ENTRY(_ELEMENT_TOP_IF) +#define DML_LOG_TOP_IF_EXIT() _LOG_EXIT(_ELEMENT_TOP_IF) #else #define DML_LOG_INFO(fmt, ...) ((void)0) +#define DML_LOG_TOP_IF_ENTER() ((void)0) +#define DML_LOG_TOP_IF_EXIT() ((void)0) #endif + +/* public macros for DML_LOG_LEVEL_DEBUG and up */ #if DML_LOG_LEVEL >= DML_LOG_LEVEL_DEBUG -#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL("[DML DEBUG] "fmt, ## __VA_ARGS__) +#define DML_LOG_DEBUG(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) +#define DML_LOG_COMP_IF_ENTER() _LOG_ENTRY(_ELEMENT_COMP_IF) +#define DML_LOG_COMP_IF_EXIT() _LOG_EXIT(_ELEMENT_COMP_IF) +#define DML_LOG_FUNC_ENTER() _LOG_ENTRY(_ELEMENT_FUNC) +#define DML_LOG_FUNC_EXIT() _LOG_EXIT(_ELEMENT_FUNC) +#define DML_LOG_DEBUG_BOOL(field) _LOG_SCALAR(field, _BOOL_FORMAT) +#define DML_LOG_DEBUG_UINT(field) _LOG_SCALAR(field, _UINT_FORMAT) +#define DML_LOG_DEBUG_INT(field) _LOG_SCALAR(field, _INT_FORMAT) +#define DML_LOG_DEBUG_DOUBLE(field) _LOG_SCALAR(field, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) _LOG_ARRAY(field, size, _BOOL_FORMAT) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) _LOG_ARRAY(field, size, _UINT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) _LOG_ARRAY(field, size, _INT_FORMAT) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) _LOG_ARRAY(field, size, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _BOOL_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _UINT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _INT_FORMAT) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) _LOG_2D_ARRAY(field, size0, size1, _DOUBLE_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _BOOL_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _UINT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _INT_FORMAT) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) _LOG_3D_ARRAY(field, size0, size1, size2, _DOUBLE_FORMAT) #else #define DML_LOG_DEBUG(fmt, ...) ((void)0) +#define DML_LOG_COMP_IF_ENTER() ((void)0) +#define DML_LOG_COMP_IF_EXIT() ((void)0) +#define DML_LOG_FUNC_ENTER() ((void)0) +#define DML_LOG_FUNC_EXIT() ((void)0) +#define DML_LOG_DEBUG_BOOL(field) ((void)0) +#define DML_LOG_DEBUG_UINT(field) ((void)0) +#define DML_LOG_DEBUG_INT(field) ((void)0) +#define DML_LOG_DEBUG_DOUBLE(field) ((void)0) +#define DML_LOG_DEBUG_ARRAY_BOOL(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_UINT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_INT(field, size) ((void)0) +#define DML_LOG_DEBUG_ARRAY_DOUBLE(field, size) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_BOOL(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_UINT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_INT(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_2D_ARRAY_DOUBLE(field, size0, size1) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_BOOL(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_UINT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_INT(field, size0, size1, size2) ((void)0) +#define DML_LOG_DEBUG_3D_ARRAY_DOUBLE(field, size0, size1, size2) ((void)0) #endif + +/* public macros for DML_LOG_LEVEL_VERBOSE */ #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE -#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL("[DML VERBOSE] "fmt, ## __VA_ARGS__) +#define DML_LOG_VERBOSE(fmt, ...) DML_LOG_INTERNAL(fmt, ## __VA_ARGS__) #else #define DML_LOG_VERBOSE(fmt, ...) ((void)0) -#endif +#endif /* #if DML_LOG_LEVEL >= DML_LOG_LEVEL_VERBOSE */ #endif /* __DML2_DEBUG_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index 00688b9f1df4..d52aa82283b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -202,6 +202,8 @@ struct dml2_core_mode_support_result { } active; unsigned int dispclk_khz; + unsigned int dpprefclk_khz; + unsigned int dtbrefclk_khz; unsigned int dcfclk_deepsleep_khz; unsigned int socclk_khz; @@ -446,13 +448,17 @@ struct dml2_core_internal_state_intermediates { }; struct dml2_core_mode_support_locals { - struct dml2_core_calcs_mode_support_ex mode_support_ex_params; + union { + struct dml2_core_calcs_mode_support_ex mode_support_ex_params; + }; struct dml2_display_cfg svp_expanded_display_cfg; struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; }; struct dml2_core_mode_programming_locals { - struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; + union { + struct dml2_core_calcs_mode_programming_ex mode_programming_ex_params; + }; struct dml2_display_cfg svp_expanded_display_cfg; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 5f1b49a50049..4cfe64aa8492 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -473,7 +473,6 @@ static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) { bool sorted, swapped; unsigned int cur_index; - unsigned int temp; int odm_slice_index; for (odm_slice_index = 0; odm_slice_index < pipes->num_pipes_assigned_to_plane_for_odm_combine; odm_slice_index++) { @@ -489,9 +488,8 @@ static void sort_pipes_for_splitting(struct dc_plane_pipe_pool *pipes) swapped = false; while (!sorted) { if (pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] > pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]) { - temp = pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]; - pipes->pipes_assigned_to_plane[odm_slice_index][cur_index] = pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1]; - pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1] = temp; + swap(pipes->pipes_assigned_to_plane[odm_slice_index][cur_index + 1], + pipes->pipes_assigned_to_plane[odm_slice_index][cur_index]); swapped = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c index 6b3b8803e0ae..c59f825cfae9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c @@ -654,14 +654,14 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state unsigned int svp_height, unsigned int svp_vstartup) { - unsigned int i, pipe_idx; + unsigned int i; double line_time, fp_and_sync_width_time; struct pipe_ctx *pipe; uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; static const double cvt_rb_vblank_max = ((double) 460 / (1000 * 1000)); // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < ctx->config.dcn_pipe_count; i++) { + for (i = 0; i < ctx->config.dcn_pipe_count; i++) { pipe = &state->res_ctx.pipe_ctx[i]; if (!pipe->stream) @@ -669,8 +669,6 @@ static void set_phantom_stream_timing(struct dml2_context *ctx, struct dc_state if (i == dc_pipe_idx) break; - - pipe_idx++; } // Calculate lines required for pstate allow width and FW processing delays @@ -868,7 +866,7 @@ bool dml2_svp_remove_all_phantom_pipes(struct dml2_context *ctx, struct dc_state /* Conditions for setting up phantom pipes for SubVP: * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) + * 2. Full update (i.e. DC_VALIDATE_MODE_AND_PROGRAMMING) * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) * 4. Display configuration passes validation * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 5de775fd8fce..3b866e876bf4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -953,6 +953,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p out->SourcePixelFormat[location] = dml_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: out->SourcePixelFormat[location] = dml_444_64; @@ -1188,22 +1189,6 @@ static unsigned int map_plane_to_dml_display_cfg(const struct dml2_context *dml2 return location; } -static void apply_legacy_svp_drr_settings(struct dml2_context *dml2, const struct dc_state *state, struct dml_display_cfg_st *dml_dispcfg) -{ - int i; - - if (state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { - ASSERT(state->stream_count == 1); - dml_dispcfg->timing.DRRDisplay[0] = true; - } else if (state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index_valid) { - - for (i = 0; i < dml_dispcfg->num_timings; i++) { - if (dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i] == state->streams[state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index]->stream_id) - dml_dispcfg->timing.DRRDisplay[i] = true; - } - } -} - static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, struct dc_state *state) { unsigned int i; @@ -1436,9 +1421,6 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat } } } - - if (!dml2->config.use_native_pstate_optimization) - apply_legacy_svp_drr_settings(dml2, context, dml_dispcfg); } void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 525b7d04bf84..9deb03a18ccc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -24,8 +24,6 @@ * */ -#include <linux/vmalloc.h> - #include "display_mode_core.h" #include "dml2_internal_types.h" #include "dml2_utils.h" @@ -95,12 +93,17 @@ static void map_hw_resources(struct dml2_context *dml2, static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2, const struct dml_display_cfg_st *display_cfg, - struct dml_mode_support_info_st *evaluation_info) + struct dml_mode_support_info_st *evaluation_info, + enum dc_validate_mode validate_mode) { struct dml2_wrapper_scratch *s = &dml2->v20.scratch; s->mode_support_params.mode_lib = &dml2->v20.dml_core_ctx; s->mode_support_params.in_display_cfg = display_cfg; + if (validate_mode == DC_VALIDATE_MODE_ONLY) + s->mode_support_params.in_start_state_idx = dml2->v20.dml_core_ctx.states.num_states - 1; + else + s->mode_support_params.in_start_state_idx = 0; s->mode_support_params.out_evaluation_info = evaluation_info; memset(evaluation_info, 0, sizeof(struct dml_mode_support_info_st)); @@ -112,10 +115,8 @@ static unsigned int pack_and_call_dml_mode_support_ex(struct dml2_context *dml2, static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrapper_optimize_configuration_params *p) { int unused_dpps = p->ip_params->max_num_dpp; - int i, j; - int odms_needed, refresh_rate_hz, dpps_needed, subvp_height, pstate_width_fw_delay_lines, surface_count; - int subvp_timing_to_add, new_timing_index, subvp_surface_to_add, new_surface_index; - float frame_time_sec, max_frame_time_sec; + int i; + int odms_needed; int largest_blend_and_timing = 0; bool optimization_done = false; @@ -130,79 +131,6 @@ static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrappe if (p->new_display_config != p->cur_display_config) *p->new_display_config = *p->cur_display_config; - // Optimize P-State Support - if (dml2->config.use_native_pstate_optimization) { - if (p->cur_mode_support_info->DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported) { - // Find a display with < 120Hz refresh rate with maximal refresh rate that's not already subvp - subvp_timing_to_add = -1; - subvp_surface_to_add = -1; - max_frame_time_sec = 0; - surface_count = 0; - for (i = 0; i < (int) p->cur_display_config->num_timings; i++) { - refresh_rate_hz = (int)div_u64((unsigned long long) p->cur_display_config->timing.PixelClock[i] * 1000 * 1000, - (p->cur_display_config->timing.HTotal[i] * p->cur_display_config->timing.VTotal[i])); - if (refresh_rate_hz < 120) { - // Check its upstream surfaces to see if this one could be converted to subvp. - dpps_needed = 0; - for (j = 0; j < (int) p->cur_display_config->num_surfaces; j++) { - if (p->cur_display_config->plane.BlendingAndTiming[j] == i && - p->cur_display_config->plane.UseMALLForPStateChange[j] == dml_use_mall_pstate_change_disable) { - dpps_needed += p->cur_mode_support_info->DPPPerSurface[j]; - subvp_surface_to_add = j; - surface_count++; - } - } - - if (surface_count == 1 && dpps_needed > 0 && dpps_needed <= unused_dpps) { - frame_time_sec = (float)1 / refresh_rate_hz; - if (frame_time_sec > max_frame_time_sec) { - max_frame_time_sec = frame_time_sec; - subvp_timing_to_add = i; - } - } - } - } - if (subvp_timing_to_add >= 0) { - new_timing_index = p->new_display_config->num_timings++; - new_surface_index = p->new_display_config->num_surfaces++; - // Add a phantom pipe reflecting the main pipe's timing - dml2_util_copy_dml_timing(&p->new_display_config->timing, new_timing_index, subvp_timing_to_add); - - pstate_width_fw_delay_lines = (int)(((double)(p->config->svp_pstate.subvp_fw_processing_delay_us + - p->config->svp_pstate.subvp_pstate_allow_width_us) / 1000000) * - (p->new_display_config->timing.PixelClock[subvp_timing_to_add] * 1000 * 1000) / - (double)p->new_display_config->timing.HTotal[subvp_timing_to_add]); - - subvp_height = p->cur_mode_support_info->SubViewportLinesNeededInMALL[subvp_timing_to_add] + pstate_width_fw_delay_lines; - - p->new_display_config->timing.VActive[new_timing_index] = subvp_height; - p->new_display_config->timing.VTotal[new_timing_index] = subvp_height + - p->new_display_config->timing.VTotal[subvp_timing_to_add] - p->new_display_config->timing.VActive[subvp_timing_to_add]; - - p->new_display_config->output.OutputDisabled[new_timing_index] = true; - - p->new_display_config->plane.UseMALLForPStateChange[subvp_surface_to_add] = dml_use_mall_pstate_change_sub_viewport; - - dml2_util_copy_dml_plane(&p->new_display_config->plane, new_surface_index, subvp_surface_to_add); - dml2_util_copy_dml_surface(&p->new_display_config->surface, new_surface_index, subvp_surface_to_add); - - p->new_display_config->plane.ViewportHeight[new_surface_index] = subvp_height; - p->new_display_config->plane.ViewportHeightChroma[new_surface_index] = subvp_height; - p->new_display_config->plane.ViewportStationary[new_surface_index] = false; - - p->new_display_config->plane.UseMALLForStaticScreen[new_surface_index] = dml_use_mall_static_screen_disable; - p->new_display_config->plane.UseMALLForPStateChange[new_surface_index] = dml_use_mall_pstate_change_phantom_pipe; - - p->new_display_config->plane.NumberOfCursors[new_surface_index] = 0; - - p->new_policy->ImmediateFlipRequirement[new_surface_index] = dml_immediate_flip_not_required; - - p->new_display_config->plane.BlendingAndTiming[new_surface_index] = new_timing_index; - - optimization_done = true; - } - } - } // Optimize Clocks if (!optimization_done) { @@ -226,7 +154,8 @@ static bool optimize_configuration(struct dml2_context *dml2, struct dml2_wrappe return optimization_done; } -static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state) +static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *dml2, struct dc_state *display_state, + enum dc_validate_mode validate_mode) { struct dml2_calculate_lowest_supported_state_for_temp_read_scratch *s = &dml2->v20.scratch.dml2_calculate_lowest_supported_state_for_temp_read_scratch; struct dml2_wrapper_scratch *s_global = &dml2->v20.scratch; @@ -268,7 +197,8 @@ static int calculate_lowest_supported_state_for_temp_read(struct dml2_context *d dml2->v20.dml_core_ctx.states.state_array[j].dram_clock_change_latency_us = s_global->dummy_pstate_table[i].dummy_pstate_latency_us; } - dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info); + dml_result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, &s->evaluation_info, + validate_mode); if (dml_result && s->evaluation_info.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive) { map_hw_resources(dml2, &s->cur_display_config, &s->evaluation_info); @@ -333,7 +263,8 @@ static bool does_configuration_meet_sw_policies(struct dml2_context *ctx, const } static bool dml_mode_support_wrapper(struct dml2_context *dml2, - struct dc_state *display_state) + struct dc_state *display_state, + enum dc_validate_mode validate_mode) { struct dml2_wrapper_scratch *s = &dml2->v20.scratch; unsigned int result = 0, i; @@ -369,7 +300,8 @@ static bool dml_mode_support_wrapper(struct dml2_context *dml2, result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, - &s->mode_support_info); + &s->mode_support_info, + validate_mode); if (result) result = does_configuration_meet_sw_policies(dml2, &s->cur_display_config, &s->mode_support_info); @@ -390,7 +322,8 @@ static bool dml_mode_support_wrapper(struct dml2_context *dml2, dml2->v20.dml_core_ctx.policy = s->new_policy; optimized_result = pack_and_call_dml_mode_support_ex(dml2, &s->new_display_config, - &s->mode_support_info); + &s->mode_support_info, + validate_mode); if (optimized_result) optimized_result = does_configuration_meet_sw_policies(dml2, &s->new_display_config, &s->mode_support_info); @@ -409,7 +342,8 @@ static bool dml_mode_support_wrapper(struct dml2_context *dml2, if (!optimized_result) { result = pack_and_call_dml_mode_support_ex(dml2, &s->cur_display_config, - &s->mode_support_info); + &s->mode_support_info, + validate_mode); } } @@ -419,118 +353,7 @@ static bool dml_mode_support_wrapper(struct dml2_context *dml2, return result; } -static int find_drr_eligible_stream(struct dc_state *display_state) -{ - int i; - - for (i = 0; i < display_state->stream_count; i++) { - if (dc_state_get_stream_subvp_type(display_state, display_state->streams[i]) == SUBVP_NONE - && display_state->streams[i]->ignore_msa_timing_param) { - // Use ignore_msa_timing_param flag to identify as DRR - return i; - } - } - - return -1; -} - -static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct dc_state *display_state) -{ - struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - bool pstate_optimization_done = false; - bool pstate_optimization_success = false; - bool result = false; - int drr_display_index = 0, non_svp_streams = 0; - bool force_svp = dml2->config.svp_pstate.force_enable_subvp; - - display_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; - display_state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index_valid = false; - - result = dml_mode_support_wrapper(dml2, display_state); - - if (!result) { - pstate_optimization_done = true; - } else if (s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported && !force_svp) { - pstate_optimization_success = true; - pstate_optimization_done = true; - } - - if (display_state->stream_count == 1 && dml2->config.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch(dml2->config.callbacks.dc, display_state)) { - display_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = true; - - result = dml_mode_support_wrapper(dml2, display_state); - } else { - non_svp_streams = display_state->stream_count; - - while (!pstate_optimization_done) { - result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); - - // Always try adding SVP first - if (result) - result = dml2_svp_add_phantom_pipe_to_dc_state(dml2, display_state, &s->mode_support_info); - else - pstate_optimization_done = true; - - - if (result) { - result = dml_mode_support_wrapper(dml2, display_state); - } else { - pstate_optimization_done = true; - } - - if (result) { - non_svp_streams--; - - if (s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported) { - if (dml2_svp_validate_static_schedulability(dml2, display_state, s->mode_support_info.DRAMClockChangeSupport[0])) { - pstate_optimization_success = true; - pstate_optimization_done = true; - } else { - pstate_optimization_success = false; - pstate_optimization_done = false; - } - } else { - drr_display_index = find_drr_eligible_stream(display_state); - - // If there is only 1 remaining non SubVP pipe that is DRR, check static - // schedulability for SubVP + DRR. - if (non_svp_streams == 1 && drr_display_index >= 0) { - if (dml2_svp_drr_schedulable(dml2, display_state, &display_state->streams[drr_display_index]->timing)) { - display_state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index_valid = true; - display_state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index = drr_display_index; - result = dml_mode_support_wrapper(dml2, display_state); - } - - if (result && s->mode_support_info.DRAMClockChangeSupport[0] != dml_dram_clock_change_unsupported) { - pstate_optimization_success = true; - pstate_optimization_done = true; - } else { - pstate_optimization_success = false; - pstate_optimization_done = false; - } - } - - if (pstate_optimization_success) { - pstate_optimization_done = true; - } else { - pstate_optimization_done = false; - } - } - } - } - } - - if (!pstate_optimization_success) { - dml2_svp_remove_all_phantom_pipes(dml2, display_state); - display_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; - display_state->bw_ctx.bw.dcn.legacy_svp_drr_stream_index_valid = false; - result = dml_mode_support_wrapper(dml2, display_state); - } - - return result; -} - -static bool call_dml_mode_support_and_programming(struct dc_state *context) +static bool call_dml_mode_support_and_programming(struct dc_state *context, enum dc_validate_mode validate_mode) { unsigned int result = 0; unsigned int min_state = 0; @@ -544,16 +367,13 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) struct dml2_wrapper_scratch *s = &dml2->v20.scratch; if (!context->streams[0]->sink->link->dc->caps.is_apu) { - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context, + validate_mode); ASSERT(min_state_for_g6_temp_read >= 0); } - if (!dml2->config.use_native_pstate_optimization) { - result = optimize_pstate_with_svp_and_drr(dml2, context); - } else { - result = dml_mode_support_wrapper(dml2, context); - } + result = dml_mode_support_wrapper(dml2, context, validate_mode); /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. @@ -575,7 +395,8 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) return result; } -static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context) +static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_state *context, + enum dc_validate_mode validate_mode) { struct dml2_context *dml2 = context->bw_ctx.dml2; struct dml2_wrapper_scratch *s = &dml2->v20.scratch; @@ -611,7 +432,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s copy_dummy_pstate_table(s->dummy_pstate_table, in_dc->clk_mgr->bw_params->dummy_pstate_table, 4); - result = call_dml_mode_support_and_programming(context); + result = call_dml_mode_support_and_programming(context, validate_mode); /* Call map dc pipes to map the pipes based on the DML output. For correctly determining if recalculation * is required or not, the resource context needs to correctly reflect the number of active pipes. We would * only know the correct number if active pipes after dml2_map_dc_pipes is called. @@ -628,7 +449,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s need_recalculation = dml2_verify_det_buffer_configuration(dml2, context, &dml2->det_helper_scratch); if (need_recalculation) { /* Engage the DML again if recalculation is required. */ - call_dml_mode_support_and_programming(context); + call_dml_mode_support_and_programming(context, validate_mode); if (!dml2->config.skip_hw_state_mapping) { dml2_map_dc_pipes(dml2, context, &s->cur_display_config, &s->dml_to_dc_pipe_mapping, in_dc->current_state); } @@ -684,7 +505,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s return result; } -static bool dml2_validate_only(struct dc_state *context) +static bool dml2_validate_only(struct dc_state *context, enum dc_validate_mode validate_mode) { struct dml2_context *dml2; unsigned int result = 0; @@ -708,12 +529,13 @@ static bool dml2_validate_only(struct dc_state *context) result = pack_and_call_dml_mode_support_ex(dml2, &dml2->v20.scratch.cur_display_config, - &dml2->v20.scratch.mode_support_info); + &dml2->v20.scratch.mode_support_info, + validate_mode); if (result) result = does_configuration_meet_sw_policies(dml2, &dml2->v20.scratch.cur_display_config, &dml2->v20.scratch.mode_support_info); - return (result == 1) ? true : false; + return result == 1; } static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *dml2) @@ -723,7 +545,8 @@ static void dml2_apply_debug_options(const struct dc *dc, struct dml2_context *d } } -bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, bool fast_validate) +bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, + enum dc_validate_mode validate_mode) { bool out = false; @@ -733,17 +556,17 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 /* DML2.1 validation path */ if (dml2->architecture == dml2_architecture_21) { - out = dml21_validate(in_dc, context, dml2, fast_validate); + out = dml21_validate(in_dc, context, dml2, validate_mode); return out; } DC_FP_START(); - /* Use dml_validate_only for fast_validate path */ - if (fast_validate) - out = dml2_validate_only(context); + /* Use dml_validate_only for DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX path */ + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) + out = dml2_validate_only(context, validate_mode); else - out = dml2_validate_and_build_resource(in_dc, context); + out = dml2_validate_and_build_resource(in_dc, context, validate_mode); DC_FP_END(); @@ -757,8 +580,8 @@ static inline struct dml2_context *dml2_allocate_memory(void) static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { - dml21_reinit(in_dc, dml2, config); + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); return; } @@ -803,9 +626,7 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete. - if ((in_dc->debug.using_dml21) - && (in_dc->ctx->dce_version == DCN_VERSION_4_01 - )) + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) return dml21_create(in_dc, dml2, config); // Allocate Mode Lib Ctx @@ -874,8 +695,8 @@ void dml2_reinit(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) { - if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) { - dml21_reinit(in_dc, dml2, config); + if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version >= DCN_VERSION_4_01)) { + dml21_reinit(in_dc, *dml2, config); return; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 5100f269368e..c384e141cebc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -240,7 +240,7 @@ struct dml2_configuration_options { bool use_clock_dc_limits; bool gpuvm_enable; bool force_tdlut_enable; - struct dml2_soc_bb *bb_from_dmub; + void *bb_from_dmub; }; /* @@ -272,7 +272,7 @@ void dml2_reinit(const struct dc *in_dc, * dml2_validate - Determines if a display configuration is supported or not. * @in_dc: dc. * @context: dc_state to be validated. - * @fast_validate: Fast validate will not populate context.res_ctx. + * @validate_mode: DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX will not populate context.res_ctx. * * DML1.0 compatible interface for validation. * @@ -295,7 +295,7 @@ void dml2_reinit(const struct dc *in_dc, bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml2, - bool fast_validate); + enum dc_validate_mode validate_mode); /* * dml2_extract_dram_and_fclk_change_support - Extracts the FCLK and UCLK change support info. diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c index 75fb77bca83b..01480a04f85e 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.c @@ -520,6 +520,15 @@ void dpp1_dppclk_control( REG_UPDATE(DPP_CONTROL, DPP_CLOCK_ENABLE, 0); } +void dpp_force_disable_cursor(struct dpp *dpp_base) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + /* Force disable cursor */ + REG_UPDATE(CURSOR0_CONTROL, CUR0_ENABLE, 0); + dpp_base->pos.cur0_ctl.bits.cur0_enable = 0; +} + static const struct dpp_funcs dcn10_dpp_funcs = { .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h index c48139bed11f..f466182963f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp.h @@ -1525,4 +1525,6 @@ void dpp1_construct(struct dcn10_dpp *dpp1, void dpp1_cm_get_gamut_remap(struct dpp *dpp_base, struct dpp_grph_csc_adjustment *adjust); +void dpp_force_disable_cursor(struct dpp *dpp_base); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 2d70586cef40..4f569cd8a5d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -578,9 +578,6 @@ static void dpp3_power_on_blnd_lut( dpp_base->ctx->dc->optimized_required = true; dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; } - } else { - REG_SET(CM_MEM_PWR_CTRL, 0, - BLNDGAM_MEM_PWR_FORCE, power_on == true ? 0 : 1); } } @@ -1494,6 +1491,7 @@ static struct dpp_funcs dcn30_dpp_funcs = { .dpp_dppclk_control = dpp1_dppclk_control, .dpp_set_hdr_multiplier = dpp3_set_hdr_multiplier, .dpp_get_gamut_remap = dpp3_cm_get_gamut_remap, + .dpp_force_disable_cursor = dpp_force_disable_cursor, }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 97bf26fa3573..36187f890d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -231,7 +231,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .dpp_program_regamma_pwl = NULL, .dpp_set_pre_degam = dpp3_set_pre_degam, .dpp_program_input_lut = NULL, - .dpp_full_bypass = dpp401_full_bypass, + .dpp_full_bypass = NULL, .dpp_setup = dpp401_dpp_setup, .dpp_program_degamma_pwl = NULL, .dpp_program_cm_dealpha = dpp3_program_cm_dealpha, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h index ecaa976e1f52..5f6b431ec398 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.h @@ -641,6 +641,7 @@ uint32_t ISHARP_DELTA_DATA; \ uint32_t ISHARP_DELTA_INDEX; \ uint32_t ISHARP_NLDELTA_SOFT_CLIP + struct dcn401_dpp_registers { DPP_REG_VARIABLE_LIST_DCN401; }; @@ -672,6 +673,16 @@ struct dcn401_dpp { struct pwl_params pwl_data; }; +enum dcn401_dscl_mode_sel { + DCN401_DSCL_MODE_SCALING_444_BYPASS = 0, + DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE = 1, + DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, + DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, + DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, + DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, + DCN401_DSCL_MODE_DSCL_BYPASS = 6 +}; + bool dpp401_construct(struct dcn401_dpp *dpp401, struct dc_context *ctx, uint32_t inst, @@ -683,8 +694,6 @@ void dpp401_dscl_set_scaler_manual_scale( struct dpp *dpp_base, const struct scaler_data *scl_data); -void dpp401_full_bypass(struct dpp *dpp_base); - void dpp401_dpp_setup( struct dpp *dpp_base, enum surface_pixel_format format, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 712aff7e17f7..7aab77b58869 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -88,30 +88,6 @@ enum dscl_mode_sel { DSCL_MODE_DSCL_BYPASS = 6 }; -void dpp401_full_bypass(struct dpp *dpp_base) -{ - struct dcn401_dpp *dpp = TO_DCN401_DPP(dpp_base); - - /* Input pixel format: ARGB8888 */ - REG_SET(CNVC_SURFACE_PIXEL_FORMAT, 0, - CNVC_SURFACE_PIXEL_FORMAT, 0x8); - - /* Zero expansion */ - REG_SET_3(FORMAT_CONTROL, 0, - CNVC_BYPASS, 0, - FORMAT_CONTROL__ALPHA_EN, 0, - FORMAT_EXPANSION_MODE, 0); - - /* COLOR_KEYER_CONTROL.COLOR_KEYER_EN = 0 this should be default */ - if (dpp->tf_mask->CM_BYPASS_EN) - REG_SET(CM_CONTROL, 0, CM_BYPASS_EN, 1); - else - REG_SET(CM_CONTROL, 0, CM_BYPASS, 1); - - /* Setting degamma bypass for now */ - REG_SET(CM_DGAM_CONTROL, 0, CM_DGAM_LUT_MODE, 0); -} - void dpp401_set_cursor_attributes( struct dpp *dpp_base, struct dc_cursor_attributes *cursor_attributes) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 2f92e7d4981b..6df3419f825f 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -78,16 +78,6 @@ enum dscl_autocal_mode { AUTOCAL_MODE_AUTOREPLICATE = 3 }; -enum dscl_mode_sel { - DSCL_MODE_SCALING_444_BYPASS = 0, - DSCL_MODE_SCALING_444_RGB_ENABLE = 1, - DSCL_MODE_SCALING_444_YCBCR_ENABLE = 2, - DSCL_MODE_SCALING_420_YCBCR_ENABLE = 3, - DSCL_MODE_SCALING_420_LUMA_BYPASS = 4, - DSCL_MODE_SCALING_420_CHROMA_BYPASS = 5, - DSCL_MODE_DSCL_BYPASS = 6 -}; - static int dpp401_dscl_get_pixel_depth_val(enum lb_pixel_depth depth) { if (depth == LB_PIXEL_DEPTH_30BPP) @@ -122,7 +112,7 @@ static bool dpp401_dscl_is_420_format(enum pixel_format format) return false; } -static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( +static enum dcn401_dscl_mode_sel dpp401_dscl_get_dscl_mode( struct dpp *dpp_base, const struct scaler_data *data, bool dbg_always_scale) @@ -132,7 +122,7 @@ static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL is processing data in fixed format */ if (data->format == PIXEL_FORMAT_FP16) - return DSCL_MODE_DSCL_BYPASS; + return DCN401_DSCL_MODE_DSCL_BYPASS; } if (data->ratios.horz.value == one @@ -140,20 +130,20 @@ static enum dscl_mode_sel dpp401_dscl_get_dscl_mode( && data->ratios.horz_c.value == one && data->ratios.vert_c.value == one && !dbg_always_scale) - return DSCL_MODE_SCALING_444_BYPASS; + return DCN401_DSCL_MODE_SCALING_444_BYPASS; if (!dpp401_dscl_is_420_format(data->format)) { if (dpp401_dscl_is_video_format(data->format)) - return DSCL_MODE_SCALING_444_YCBCR_ENABLE; + return DCN401_DSCL_MODE_SCALING_444_YCBCR_ENABLE; else - return DSCL_MODE_SCALING_444_RGB_ENABLE; + return DCN401_DSCL_MODE_SCALING_444_RGB_ENABLE; } if (data->ratios.horz.value == one && data->ratios.vert.value == one) - return DSCL_MODE_SCALING_420_LUMA_BYPASS; + return DCN401_DSCL_MODE_SCALING_420_LUMA_BYPASS; if (data->ratios.horz_c.value == one && data->ratios.vert_c.value == one) - return DSCL_MODE_SCALING_420_CHROMA_BYPASS; + return DCN401_DSCL_MODE_SCALING_420_CHROMA_BYPASS; - return DSCL_MODE_SCALING_420_YCBCR_ENABLE; + return DCN401_DSCL_MODE_SCALING_420_YCBCR_ENABLE; } static void dpp401_power_on_dscl( @@ -1071,7 +1061,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, uint32_t v_num_taps_c = scl_data->taps.v_taps_c - 1; uint32_t h_num_taps = scl_data->taps.h_taps - 1; uint32_t h_num_taps_c = scl_data->taps.h_taps_c - 1; - enum dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode( + enum dcn401_dscl_mode_sel dscl_mode = dpp401_dscl_get_dscl_mode( dpp_base, scl_data, dpp_base->ctx->dc->debug.always_scale); bool ycbcr = scl_data->format >= PIXEL_FORMAT_VIDEO_BEGIN && scl_data->format <= PIXEL_FORMAT_VIDEO_END; @@ -1102,7 +1092,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, dpp->scl_data = *scl_data; if ((dpp->base.ctx->dc->config.use_spl) && (!dpp->base.ctx->dc->debug.disable_spl)) { - dscl_mode = (enum dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode; + dscl_mode = (enum dcn401_dscl_mode_sel) scl_data->dscl_prog_data.dscl_mode; rect = (struct rect *)&scl_data->dscl_prog_data.recout; mpc_width = scl_data->dscl_prog_data.mpc_size.width; mpc_height = scl_data->dscl_prog_data.mpc_size.height; @@ -1112,7 +1102,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, h_num_taps_c = scl_data->dscl_prog_data.taps.h_taps_c; } if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) { - if (dscl_mode != DSCL_MODE_DSCL_BYPASS) + if (dscl_mode != DCN401_DSCL_MODE_DSCL_BYPASS) dpp401_power_on_dscl(dpp_base, true); } @@ -1139,7 +1129,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, /* SCL mode */ REG_UPDATE(SCL_MODE, DSCL_MODE, dscl_mode); - if (dscl_mode == DSCL_MODE_DSCL_BYPASS) { + if (dscl_mode == DCN401_DSCL_MODE_DSCL_BYPASS) { if (dpp_base->ctx->dc->debug.enable_mem_low_power.bits.dscl) dpp401_power_on_dscl(dpp_base, false); return; @@ -1149,7 +1139,7 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, lb_config = dpp401_dscl_find_lb_memory_config(dpp, scl_data); dpp401_dscl_set_lb(dpp, &scl_data->lb_params, lb_config); - if (dscl_mode == DSCL_MODE_SCALING_444_BYPASS) { + if (dscl_mode == DCN401_DSCL_MODE_SCALING_444_BYPASS) { if (dpp->base.ctx->dc->config.prefer_easf) dpp401_dscl_disable_easf(dpp_base, scl_data); dpp401_dscl_program_isharp(dpp_base, scl_data, program_isharp_1dlut, &bs_coeffs_updated); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 11535922b5ff..e4144b244332 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -30,6 +30,9 @@ #include "rc_calc.h" #include "fixed31_32.h" +#include "clk_mgr.h" +#include "resource.h" + #define DC_LOGGER \ dsc->ctx->logger @@ -149,6 +152,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing( } /* Forward Declerations */ +static unsigned int get_min_dsc_slice_count_for_odm( + const struct display_stream_compressor *dsc, + const struct dsc_enc_caps *dsc_enc_caps, + const struct dc_crtc_timing *timing); + static bool decide_dsc_bandwidth_range( const uint32_t min_bpp_x16, const uint32_t max_bpp_x16, @@ -183,6 +191,7 @@ static bool setup_dsc_config( const struct dc_crtc_timing *timing, const struct dc_dsc_config_options *options, const enum dc_link_encoding_format link_encoding, + int min_slice_count, struct dc_dsc_config *dsc_cfg); static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size) @@ -442,7 +451,6 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, return true; } - /* If DSC is possbile, get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range and * timing's pixel clock and uncompressed bandwidth. * If DSC is not possible, leave '*range' untouched. @@ -458,6 +466,7 @@ bool dc_dsc_compute_bandwidth_range( struct dc_dsc_bw_range *range) { bool is_dsc_possible = false; + unsigned int min_dsc_slice_count; struct dsc_enc_caps dsc_enc_caps; struct dsc_enc_caps dsc_common_caps; struct dc_dsc_config config = {0}; @@ -469,12 +478,14 @@ bool dc_dsc_compute_bandwidth_range( get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, timing->pixel_encoding, &dsc_common_caps); if (is_dsc_possible) is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - &options, link_encoding, &config); + &options, link_encoding, min_dsc_slice_count, &config); if (is_dsc_possible) is_dsc_possible = decide_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, @@ -525,20 +536,153 @@ void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp); } + +static void build_dsc_enc_combined_slice_caps( + const struct dsc_enc_caps *single_dsc_enc_caps, + struct dsc_enc_caps *dsc_enc_caps, + unsigned int max_odm_combine_factor) +{ + /* 1-16 slice configurations, single DSC */ + dsc_enc_caps->slice_caps.raw |= single_dsc_enc_caps->slice_caps.raw; + + /* 2x DSC's */ + if (max_odm_combine_factor >= 2) { + /* 1 + 1 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; + + /* 2 + 2 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; + + /* 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + + /* 8 + 8 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; + } + + /* 3x DSC's */ + if (max_odm_combine_factor >= 3) { + /* 4 + 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + } + + /* 4x DSC's */ + if (max_odm_combine_factor >= 4) { + /* 1 + 1 + 1 + 1 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; + + /* 2 + 2 + 2 + 2 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; + + /* 3 + 3 + 3 + 3 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_3; + + /* 4 + 4 + 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + } +} + +static void build_dsc_enc_caps( + const struct display_stream_compressor *dsc, + struct dsc_enc_caps *dsc_enc_caps) +{ + unsigned int max_dscclk_khz; + unsigned int num_dsc; + unsigned int max_odm_combine_factor; + struct dsc_enc_caps single_dsc_enc_caps; + + struct dc *dc; + + if (!dsc || !dsc->ctx || !dsc->ctx->dc || !dsc->funcs->dsc_get_single_enc_caps) + return; + + dc = dsc->ctx->dc; + + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool || dc->debug.disable_dsc) + return; + + /* get max DSCCLK from clk_mgr */ + max_dscclk_khz = dc->clk_mgr->funcs->get_max_clock_khz(dc->clk_mgr, CLK_TYPE_DSCCLK); + + dsc->funcs->dsc_get_single_enc_caps(&single_dsc_enc_caps, max_dscclk_khz); + + /* global capabilities */ + dsc_enc_caps->dsc_version = single_dsc_enc_caps.dsc_version; + dsc_enc_caps->lb_bit_depth = single_dsc_enc_caps.lb_bit_depth; + dsc_enc_caps->is_block_pred_supported = single_dsc_enc_caps.is_block_pred_supported; + dsc_enc_caps->max_slice_width = single_dsc_enc_caps.max_slice_width; + dsc_enc_caps->bpp_increment_div = single_dsc_enc_caps.bpp_increment_div; + dsc_enc_caps->color_formats.raw = single_dsc_enc_caps.color_formats.raw; + dsc_enc_caps->color_depth.raw = single_dsc_enc_caps.color_depth.raw; + + /* expand per DSC capabilities to global */ + max_odm_combine_factor = dc->caps.max_odm_combine_factor; + num_dsc = dc->res_pool->res_cap->num_dsc; + max_odm_combine_factor = min(max_odm_combine_factor, num_dsc); + dsc_enc_caps->max_total_throughput_mps = + single_dsc_enc_caps.max_total_throughput_mps * + max_odm_combine_factor; + + /* check slice counts possible for with ODM combine */ + build_dsc_enc_combined_slice_caps(&single_dsc_enc_caps, dsc_enc_caps, max_odm_combine_factor); +} + +static inline uint32_t dsc_div_by_10_round_up(uint32_t value) +{ + return (value + 9) / 10; +} + +static unsigned int get_min_dsc_slice_count_for_odm( + const struct display_stream_compressor *dsc, + const struct dsc_enc_caps *dsc_enc_caps, + const struct dc_crtc_timing *timing) +{ + unsigned int max_dispclk_khz; + + /* get max pixel rate and combine caps */ + max_dispclk_khz = dsc_enc_caps->max_total_throughput_mps * 1000; + if (dsc && dsc->ctx->dc) { + if (dsc->ctx->dc->clk_mgr && + dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz) { + /* dispclk is available */ + max_dispclk_khz = dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz(dsc->ctx->dc->clk_mgr, CLK_TYPE_DISPCLK); + } + } + + /* validate parameters */ + if (max_dispclk_khz == 0 || dsc_enc_caps->max_slice_width == 0) + return 1; + + /* consider minimum odm slices required due to + * 1) display pipe throughput (dispclk) + * 2) max image width per slice + */ + return dc_fixpt_ceil(dc_fixpt_max( + dc_fixpt_div_int(dc_fixpt_from_int(dsc_div_by_10_round_up(timing->pix_clk_100hz)), + max_dispclk_khz), // throughput + dc_fixpt_div_int(dc_fixpt_from_int(timing->h_addressable + timing->h_border_left + timing->h_border_right), + dsc_enc_caps->max_slice_width))); // slice width +} + static void get_dsc_enc_caps( const struct display_stream_compressor *dsc, struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) { - // This is a static HW query, so we can use any DSC - memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (dsc) { - if (!dsc->ctx->dc->debug.disable_dsc) - dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); - if (dsc->ctx->dc->debug.native422_support) - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; + + if (!dsc || !dsc->ctx || !dsc->ctx->dc || dsc->ctx->dc->debug.disable_dsc) + return; + + /* check if reported cap global or only for a single DCN DSC enc */ + if (dsc->funcs->dsc_get_enc_caps) { + dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); + } else { + build_dsc_enc_caps(dsc, dsc_enc_caps); } + + if (dsc->ctx->dc->debug.native422_support) + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; } /* Returns 'false' if no intersection was found for at least one capability. @@ -621,11 +765,6 @@ static bool intersect_dsc_caps( return true; } -static inline uint32_t dsc_div_by_10_round_up(uint32_t value) -{ - return (value + 9) / 10; -} - static uint32_t compute_bpp_x16_from_target_bandwidth( const uint32_t bandwidth_in_kbps, const struct dc_crtc_timing *timing, @@ -910,11 +1049,11 @@ static bool setup_dsc_config( const struct dc_crtc_timing *timing, const struct dc_dsc_config_options *options, const enum dc_link_encoding_format link_encoding, + int min_slices_h, struct dc_dsc_config *dsc_cfg) { struct dsc_enc_caps dsc_common_caps; int max_slices_h = 0; - int min_slices_h = 0; int num_slices_h = 0; int pic_width; int slice_width; @@ -1018,12 +1157,14 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - min_slices_h = pic_width / dsc_common_caps.max_slice_width; - if (pic_width % dsc_common_caps.max_slice_width) - min_slices_h++; + /* increase miniumum slice count to meet sink slice width limitations */ + min_slices_h = dc_fixpt_ceil(dc_fixpt_max( + dc_fixpt_div_int(dc_fixpt_from_int(pic_width), dsc_common_caps.max_slice_width), // sink min + dc_fixpt_from_int(min_slices_h))); // source min min_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, min_slices_h); + /* increase minimum slice count to meet sink throughput limitations */ while (min_slices_h <= max_slices_h) { int pix_clk_per_slice_khz = dsc_div_by_10_round_up(timing->pix_clk_100hz) / min_slices_h; if (pix_clk_per_slice_khz <= sink_per_slice_throughput_mps * 1000) @@ -1032,14 +1173,12 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } - is_dsc_possible = (min_slices_h <= max_slices_h); - - if (pic_width % min_slices_h != 0) - min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - - if (min_slices_h == 0 && max_slices_h == 0) - is_dsc_possible = false; + /* increase minimum slice count to meet divisibility requirements */ + while (pic_width % min_slices_h != 0 && min_slices_h <= max_slices_h) { + min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); + } + is_dsc_possible = (min_slices_h <= max_slices_h) && max_slices_h != 0; if (!is_dsc_possible) goto done; @@ -1162,12 +1301,19 @@ bool dc_dsc_compute_config( { bool is_dsc_possible = false; struct dsc_enc_caps dsc_enc_caps; - + unsigned int min_dsc_slice_count; get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, target_bandwidth_kbps, - timing, options, link_encoding, dsc_cfg); + timing, + options, + link_encoding, + min_dsc_slice_count, + dsc_cfg); return is_dsc_possible; } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c index bd1b9aef6d5c..89f0d999bf35 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn20/dcn20_dsc.c @@ -406,9 +406,10 @@ bool dsc_prepare_config(const struct dsc_config *dsc_cfg, struct dsc_reg_values dsc_reg_vals->alternate_ich_encoding_en = dsc_reg_vals->pps.dsc_version_minor == 1 ? 0 : 1; dsc_reg_vals->ich_reset_at_eol = (dsc_cfg->is_odm || dsc_reg_vals->num_slices_h > 1) ? 0xF : 0; + // Need to find the ceiling value for the slice width + dsc_reg_vals->pps.slice_width = (dsc_cfg->pic_width + dsc_cfg->dc_dsc_cfg.num_slices_h - 1) / dsc_cfg->dc_dsc_cfg.num_slices_h; // TODO: in addition to validating slice height (pic height must be divisible by slice height), // see what happens when the same condition doesn't apply for slice_width/pic_width. - dsc_reg_vals->pps.slice_width = dsc_cfg->pic_width / dsc_cfg->dc_dsc_cfg.num_slices_h; dsc_reg_vals->pps.slice_height = dsc_cfg->pic_height / dsc_cfg->dc_dsc_cfg.num_slices_v; ASSERT(dsc_reg_vals->pps.slice_height * dsc_cfg->dc_dsc_cfg.num_slices_v == dsc_cfg->pic_height); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 4222679fd4c9..7bd92ae8b13e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -9,19 +9,14 @@ #include "dsc/dscc_types.h" #include "dsc/rc_calc.h" -#define MAX_THROUGHPUT_PER_DSC_100HZ 20000000 -#define MAX_DSC_UNIT_COMBINE 4 - static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals); /* Object I/F functions */ //static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); //static bool dsc401_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps); -static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); -static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); +static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz); static const struct dsc_funcs dcn401_dsc_funcs = { - .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, .dsc_set_config = dsc401_set_config, @@ -30,6 +25,7 @@ static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_disable = dsc401_disable, .dsc_disconnect = dsc401_disconnect, .dsc_wait_disconnect_pending_clear = dsc401_wait_disconnect_pending_clear, + .dsc_get_single_enc_caps = dsc401_get_single_enc_caps, }; /* Macro definitios for REG_SET macros*/ @@ -66,22 +62,14 @@ void dsc401_construct(struct dcn401_dsc *dsc, dsc->max_image_width = 5184; } -static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) +static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz) { - int min_dsc_unit_required = (pixel_clock_100Hz + MAX_THROUGHPUT_PER_DSC_100HZ - 1) / MAX_THROUGHPUT_PER_DSC_100HZ; - dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ - /* 1 slice is only supported with 1 DSC unit */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = min_dsc_unit_required == 1 ? 1 : 0; - /* 2 slice is only supported with 1 or 2 DSC units */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = (min_dsc_unit_required == 1 || min_dsc_unit_required == 2) ? 1 : 0; - /* 3 slice is only supported with 1 DSC unit */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = min_dsc_unit_required == 1 ? 1 : 0; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; dsc_enc_caps->lb_bit_depth = 13; dsc_enc_caps->is_block_pred_supported = true; @@ -95,7 +83,7 @@ static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clo dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; - dsc_enc_caps->max_total_throughput_mps = MAX_THROUGHPUT_PER_DSC_100HZ * MAX_DSC_UNIT_COMBINE; + dsc_enc_caps->max_total_throughput_mps = max_dscclk_khz * 3 / 1000; dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ @@ -191,7 +179,7 @@ void dsc401_disable(struct display_stream_compressor *dsc) DSC_CLOCK_EN, 0); } -static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc) +void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc) { struct dcn401_dsc *dsc401 = TO_DCN401_DSC(dsc); diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h index e3ca70058e64..7acd57eb4f42 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h @@ -341,5 +341,6 @@ void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_c void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe); void dsc401_disable(struct display_stream_compressor *dsc); void dsc401_disconnect(struct display_stream_compressor *dsc); +void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h index 1ebce5426a58..b0bd1f9425b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h @@ -108,6 +108,7 @@ struct dsc_funcs { void (*dsc_disable)(struct display_stream_compressor *dsc); void (*dsc_disconnect)(struct display_stream_compressor *dsc); void (*dsc_wait_disconnect_pending_clear)(struct display_stream_compressor *dsc); + void (*dsc_get_single_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c index 1313a7c5d87b..73a1e6a03719 100644 --- a/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c +++ b/drivers/gpu/drm/amd/display/dc/hdcp/hdcp_msg.c @@ -28,7 +28,7 @@ #include "include/hdcp_msg_types.h" #include "include/signal_types.h" #include "core_types.h" -#include "link.h" +#include "link_service.h" #include "link_hwss.h" #include "link/protocols/link_dpcd.h" diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index d347bb06577a..e7e5f6d4778e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -440,6 +440,35 @@ void hubbub3_init_watermarks(struct hubbub *hubbub) REG_WRITE(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, reg); } +void hubbub3_get_det_sizes(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + + REG_GET_2(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, &curr_det_sizes[0], + DET0_SIZE, &target_det_sizes[0]); + + REG_GET_2(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, &curr_det_sizes[1], + DET1_SIZE, &target_det_sizes[1]); + + REG_GET_2(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, &curr_det_sizes[2], + DET2_SIZE, &target_det_sizes[2]); + + REG_GET_2(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, &curr_det_sizes[3], + DET3_SIZE, &target_det_sizes[3]); + +} + +uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + uint32_t compbuf_config_error = 0; + + REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, + &compbuf_config_error); + + return compbuf_config_error; +} + static const struct hubbub_funcs hubbub30_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -457,6 +486,8 @@ static const struct hubbub_funcs hubbub30_funcs = { .force_pstate_change_control = hubbub3_force_pstate_change_control, .init_watermarks = hubbub3_init_watermarks, .hubbub_read_state = hubbub2_read_state, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub3_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h index ca6233e8f1f4..49a469969d36 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.h @@ -133,4 +133,10 @@ void hubbub3_force_pstate_change_control(struct hubbub *hubbub, void hubbub3_init_watermarks(struct hubbub *hubbub); +void hubbub3_get_det_sizes(struct hubbub *hubbub, + uint32_t *curr_det_sizes, + uint32_t *target_det_sizes); + +uint32_t hubbub3_compbuf_config_error(struct hubbub *hubbub); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index b98505b240a7..cdb20251a154 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -1071,6 +1071,8 @@ static const struct hubbub_funcs hubbub31_funcs = { .program_compbuf_size = dcn31_program_compbuf_size, .init_crb = dcn31_init_crb, .hubbub_read_state = hubbub2_read_state, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 32a6be543105..4d4ca6d77bbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -28,6 +28,7 @@ #include "dcn32_hubbub.h" #include "dm_services.h" #include "reg_helper.h" +#include "dal_asic_id.h" #define CTX \ @@ -72,6 +73,14 @@ static void dcn32_init_crb(struct hubbub *hubbub) REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F); } +static void hubbub32_set_sdp_control(struct hubbub *hubbub, bool dc_control) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + REG_UPDATE(DCHUBBUB_SDPIF_CFG0, + SDPIF_PORT_CONTROL, dc_control); +} + void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); @@ -754,8 +763,18 @@ static bool hubbub32_program_watermarks( unsigned int refclk_mhz, bool safe_to_lower) { + struct dc *dc = hubbub->ctx->dc; bool wm_pending = false; + if (!safe_to_lower && dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + /* before raising watermarks, SDP control give to DF, stutter must be disabled */ + wm_pending = true; + hubbub32_set_sdp_control(hubbub, false); + hubbub1_allow_self_refresh_control(hubbub, false); + } + if (hubbub32_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower)) wm_pending = true; @@ -786,10 +805,20 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower) { + /* after lowering watermarks, stutter setting is restored, SDP control given to DC */ + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + + if (dc->debug.disable_stutter_for_wm_program && + (ASICREV_IS_GC_11_0_0(dc->ctx->asic_id.hw_internal_rev) || + ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev))) { + hubbub32_set_sdp_control(hubbub, true); + } + } else if (dc->debug.disable_stutter) { + hubbub1_allow_self_refresh_control(hubbub, !dc->debug.disable_stutter); + } - hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); + hubbub32_force_usr_retraining_allow(hubbub, dc->debug.force_usr_allow); return wm_pending; } @@ -974,8 +1003,7 @@ void hubbub32_init(struct hubbub *hubbub) ignore the "df_pre_cstate_req" from the SDP port control. only the DCN will determine when to connect the SDP port */ - REG_UPDATE(DCHUBBUB_SDPIF_CFG0, - SDPIF_PORT_CONTROL, 1); + hubbub32_set_sdp_control(hubbub, true); /*Set SDP's max outstanding request to 512 must set the register back to 0 (max outstanding = 256) in zero frame buffer mode*/ REG_UPDATE(DCHUBBUB_SDPIF_CFG1, @@ -1009,6 +1037,8 @@ static const struct hubbub_funcs hubbub32_funcs = { .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .set_request_limit = hubbub32_set_request_limit, .get_mall_en = hubbub32_get_mall_en, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub32_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 6d41953011f5..a443722a8632 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -589,6 +589,8 @@ static const struct hubbub_funcs hubbub35_funcs = { .hubbub_read_state = hubbub2_read_state, .force_usr_retraining_allow = hubbub32_force_usr_retraining_allow, .dchubbub_init = hubbub35_init, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub35_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 92fab471b183..a36273a52880 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1247,6 +1247,8 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, .program_arbiter = dcn401_program_arbiter, + .get_det_sizes = hubbub3_get_det_sizes, + .compbuf_config_error = hubbub3_compbuf_config_error, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index c7765e6f09e6..cf2eb9793008 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -104,7 +104,8 @@ SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\ SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\ - SRI(HUBP_CLK_CNTL, HUBP, id) + SRI(HUBP_CLK_CNTL, HUBP, id),\ + SRI(HUBPRET_READ_LINE_VALUE, HUBPRET, id) /* Register address initialization macro for ASICs with VM */ #define HUBP_REG_LIST_DCN_VM(id)\ @@ -249,7 +250,8 @@ uint32_t CURSOR_POSITION; \ uint32_t CURSOR_HOT_SPOT; \ uint32_t CURSOR_DST_OFFSET; \ - uint32_t HUBP_CLK_CNTL + uint32_t HUBP_CLK_CNTL; \ + uint32_t HUBPRET_READ_LINE_VALUE #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -622,6 +624,8 @@ type DCN_VM_SYSTEM_APERTURE_DEFAULT_SYSTEM;\ type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB;\ type DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB;\ + type PIPE_READ_LINE;\ + type HUBP_SEG_ALLOC_ERR_STATUS;\ /* todo: get these from GVM instead of reading registers ourselves */\ type PAGE_DIRECTORY_ENTRY_HI32;\ type PAGE_DIRECTORY_ENTRY_LO32;\ @@ -666,10 +670,30 @@ struct dcn_mi_mask { DCN_HUBP_REG_FIELD_LIST(uint32_t); }; +struct dcn_fl_regs_st { + uint32_t lut_enable; + uint32_t lut_done; + uint32_t lut_addr_mode; + uint32_t lut_width; + uint32_t lut_mpc_width; + uint32_t lut_tmz; + uint32_t lut_crossbar_sel_r; + uint32_t lut_crossbar_sel_g; + uint32_t lut_crossbar_sel_b; + uint32_t lut_addr_hi; + uint32_t lut_addr_lo; + uint32_t refcyc_3dlut_group; + uint32_t lut_fl_bias; + uint32_t lut_fl_scale; + uint32_t lut_fl_mode; + uint32_t lut_fl_format; +}; + struct dcn_hubp_state { struct _vcs_dpi_display_dlg_regs_st dlg_attr; struct _vcs_dpi_display_ttu_regs_st ttu_attr; struct _vcs_dpi_display_rq_regs_st rq_regs; + struct dcn_fl_regs_st fl_regs; uint32_t pixel_format; uint32_t inuse_addr_hi; uint32_t inuse_addr_lo; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index 62369be070ea..f325db555102 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -264,6 +264,7 @@ type HUBP_3DLUT_DONE;\ type HUBP_3DLUT_ADDRESSING_MODE;\ type HUBP_3DLUT_WIDTH;\ + type HUBP_3DLUT_MPC_WIDTH;\ type HUBP_3DLUT_TMZ;\ type HUBP_3DLUT_CROSSBAR_SELECT_Y_G;\ type HUBP_3DLUT_CROSSBAR_SELECT_CB_B;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 0da70b50e86d..556214b2227d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -505,6 +505,30 @@ void hubp3_init(struct hubp *hubp) hubp_reset(hubp); } +uint32_t hubp3_get_current_read_line(struct hubp *hubp) +{ + uint32_t read_line = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(HUBPRET_READ_LINE_VALUE, + PIPE_READ_LINE, + &read_line); + + return read_line; +} + +unsigned int hubp3_get_underflow_status(struct hubp *hubp) +{ + uint32_t hubp_underflow = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, + HUBP_UNDERFLOW_STATUS, + &hubp_underflow); + + return hubp_underflow; +} + static struct hubp_funcs dcn30_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -534,6 +558,8 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index b7d7adf0b58c..842f4eb72cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -243,7 +243,8 @@ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh) + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh) bool hubp3_construct( struct dcn20_hubp *hubp2, @@ -299,6 +300,11 @@ void hubp3_init(struct hubp *hubp); void hubp3_clear_tiling(struct hubp *hubp); +uint32_t hubp3_get_current_read_line(struct hubp *hubp); + +uint32_t hubp3_get_underflow_status(struct hubp *hubp); + + #endif /* __DC_HUBP_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 7fd582a8a4ba..47101847c2b7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -68,6 +68,18 @@ void hubp31_program_extended_blank_value( hubp31_program_extended_blank(hubp, min_dst_y_next_start_optimized); } +uint32_t hubp31_get_det_config_error(struct hubp *hubp) +{ + uint32_t config_error = 0; + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, + HUBP_SEG_ALLOC_ERR_STATUS, + &config_error); + + return config_error; +} + static struct hubp_funcs dcn31_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -98,6 +110,9 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h index d688db79b750..5952c4671507 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.h @@ -228,7 +228,9 @@ HUBP_SF(HUBPREQ0_FLIP_PARAMETERS_6, REFCYC_PER_META_CHUNK_FLIP_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_5, REFCYC_PER_VM_GROUP_VBLANK, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_6, REFCYC_PER_VM_REQ_VBLANK, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh) + HUBP_SF(HUBP0_DCHUBP_REQ_SIZE_CONFIG, VM_GROUP_SIZE, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh) bool hubp31_construct( @@ -246,4 +248,6 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable); void hubp31_program_extended_blank_value( struct hubp *hubp, unsigned int min_dst_y_next_start_optimized); +uint32_t hubp31_get_det_config_error(struct hubp *hubp); + #endif /* __DC_HUBP_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index f3a21c623f44..a5f23bb2a76a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -206,6 +206,9 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_update_mall_sel = hubp32_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp32_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 6d060ba12da8..b140808f21af 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -218,6 +218,9 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank_value, .hubp_clear_tiling = hubp3_clear_tiling, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp35_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index baed31611477..0fcbc6a35be6 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -86,11 +86,11 @@ void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width); } -void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0); + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_bits); } void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, @@ -127,6 +127,43 @@ void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_forma REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_FORMAT, format); } +void hubp401_program_3dlut_fl_config( + struct hubp *hubp, + struct hubp_fl_3dlut_config *cfg) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + uint32_t mpc_width = {(cfg->width == 17) ? 0 : 1}; + uint32_t width = {cfg->width}; + + if (cfg->layout == DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR) + width = (cfg->width == 17) ? 4916 : 35940; + + REG_UPDATE_2(_3DLUT_FL_CONFIG, + HUBP0_3DLUT_FL_MODE, cfg->mode, + HUBP0_3DLUT_FL_FORMAT, cfg->format); + + REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, + HUBP0_3DLUT_FL_BIAS, cfg->bias, + HUBP0_3DLUT_FL_SCALE, cfg->scale); + + REG_UPDATE(HUBP_3DLUT_ADDRESS_HIGH, + HUBP_3DLUT_ADDRESS_HIGH, cfg->address.lut3d.addr.high_part); + REG_UPDATE(HUBP_3DLUT_ADDRESS_LOW, + HUBP_3DLUT_ADDRESS_LOW, cfg->address.lut3d.addr.low_part); + + //cross bar + REG_UPDATE_8(HUBP_3DLUT_CONTROL, + HUBP_3DLUT_MPC_WIDTH, mpc_width, + HUBP_3DLUT_WIDTH, width, + HUBP_3DLUT_CROSSBAR_SELECT_CR_R, cfg->crossbar_bit_slice_cr_r, + HUBP_3DLUT_CROSSBAR_SELECT_Y_G, cfg->crossbar_bit_slice_y_g, + HUBP_3DLUT_CROSSBAR_SELECT_CB_B, cfg->crossbar_bit_slice_cb_b, + HUBP_3DLUT_ADDRESSING_MODE, cfg->addr_mode, + HUBP_3DLUT_TMZ, cfg->protection_bits, + HUBP_3DLUT_ENABLE, cfg->enabled ? 1 : 0); +} + void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -1033,6 +1070,10 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, .hubp_clear_tiling = hubp401_clear_tiling, + .hubp_program_3dlut_fl_config = hubp401_program_3dlut_fl_config, + .hubp_get_underflow_status = hubp3_get_underflow_status, + .hubp_get_current_read_line = hubp3_get_current_read_line, + .hubp_get_det_config_error = hubp31_get_det_config_error, }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index 6e1d4c90ddd4..fdabbeec8ffa 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -252,7 +252,9 @@ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P0, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P0, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_1H_P1, mask_sh),\ - HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh) + HUBP_SF(HUBP0_DCHUBP_MCACHEID_CONFIG, MCACHEID_MALL_PREF_2H_P1, mask_sh),\ + HUBP_SF(HUBPRET0_HUBPRET_READ_LINE_VALUE, PIPE_READ_LINE, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_SEG_ALLOC_ERR_STATUS, mask_sh) void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); @@ -333,7 +335,7 @@ void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); -void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled); +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits); void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width); @@ -349,6 +351,10 @@ void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_forma void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); +void hubp401_program_3dlut_fl_config( + struct hubp *hubp, + struct hubp_fl_3dlut_config *cfg); + void hubp401_clear_tiling(struct hubp *hubp); void hubp401_vready_at_or_After_vsync(struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e8730cc40edb..24184b4eb352 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -48,7 +48,7 @@ #include "link_encoder.h" #include "link_enc_cfg.h" #include "link_hwss.h" -#include "link.h" +#include "link_service.h" #include "dccg.h" #include "clock_source.h" #include "clk_mgr.h" @@ -671,6 +671,7 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) uint32_t early_control = 0; struct timing_generator *tg = pipe_ctx->stream_res.tg; + link_hwss->setup_stream_attribute(pipe_ctx); link_hwss->setup_stream_encoder(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -1186,8 +1187,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) if (dccg) { dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); - if (dccg && dccg->funcs->set_dtbclk_dto) - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + if (!(dc->ctx->dce_version >= DCN_VERSION_3_5)) { + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + } } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, @@ -1225,7 +1228,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) return; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control && hws) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } @@ -1267,7 +1270,7 @@ void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) pipe_ctx->stream_res.stream_enc->funcs->set_avmute(pipe_ctx->stream_res.stream_enc, enable); } -static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) +enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) { switch (crtc_id) { case CONTROLLER_ID_D0: @@ -1287,7 +1290,7 @@ static enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id) } } -static void populate_audio_dp_link_info( +void populate_audio_dp_link_info( const struct pipe_ctx *pipe_ctx, struct audio_dp_link_info *dp_link_info) { @@ -1379,7 +1382,7 @@ static void populate_audio_dp_link_info( } } -static void build_audio_output( +void build_audio_output( struct dc_state *state, const struct pipe_ctx *pipe_ctx, struct audio_output *audio_output) @@ -1684,6 +1687,19 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( if (dc_is_dp_signal(pipe_ctx->stream->signal)) dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); + /* Temporary workaround to perform DSC programming ahead of stream enablement + * for smartmux/SPRS + * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized + */ + if (pipe_ctx->stream->timing.flags.DSC) { + if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest) + || link->is_dds || link->skip_implict_edp_power_control)) && + (dc_is_dp_signal(pipe_ctx->stream->signal) || + dc_is_virtual_signal(pipe_ctx->stream->signal))) + dc->link_srv->set_dsc_enable(pipe_ctx, true); + } + if (!stream->dpms_off) dc->link_srv->set_dpms_on(context, pipe_ctx); @@ -1909,10 +1925,8 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) get_edp_streams(context, edp_streams, &edp_stream_num); - // Check fastboot support, disable on DCE8 because of blank screens - if (edp_num && edp_stream_num && dc->ctx->dce_version != DCE_VERSION_8_0 && - dc->ctx->dce_version != DCE_VERSION_8_1 && - dc->ctx->dce_version != DCE_VERSION_8_3) { + /* Check fastboot support, disable on DCE 6-8 because of blank screens */ + if (edp_num && edp_stream_num && dc->ctx->dce_version < DCE_VERSION_10_0) { for (i = 0; i < edp_num; i++) { edp_link = edp_links[i]; if (edp_link != edp_streams[0]->link) @@ -1925,6 +1939,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) can_apply_edp_fast_boot = dc_validate_boot_timing(dc, edp_stream->sink, &edp_stream->timing); + + // For Mux-platform, the default value is false. + // Disable fast boot during mux switching. + // The flag would be clean after switching done. + if (dc->is_switch_in_progress_dest && edp_link->is_dds) + can_apply_edp_fast_boot = false; + edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; if (can_apply_edp_fast_boot) { DC_LOG_EVENT_LINK_TRAINING("eDP fast boot Enable\n"); @@ -1968,6 +1989,10 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) if (edp_with_sink_num) edp_link_with_sink = edp_links_with_sink[0]; + // During a mux switch, powering down the HW blocks and then enabling + // the link via a DPCD SET_POWER write causes a brief flash + keep_edp_vdd_on |= dc->is_switch_in_progress_dest; + if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) { if (edp_link_with_sink && !keep_edp_vdd_on) { /*turn off backlight before DP_blank and encoder powered down*/ @@ -2228,7 +2253,7 @@ static bool should_enable_fbc(struct dc *dc, /* * Enable FBC */ -static void enable_fbc( +void enable_fbc( struct dc *dc, struct dc_state *context) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h index 06789ac3a224..9c032e449481 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h @@ -110,5 +110,16 @@ void dce110_enable_dp_link_output( enum signal_type signal, enum clock_source_id clock_source, const struct dc_link_settings *link_settings); +void build_audio_output( + struct dc_state *state, + const struct pipe_ctx *pipe_ctx, + struct audio_output *audio_output); +enum audio_dto_source translate_to_dto_source(enum controller_id crtc_id); +void populate_audio_dp_link_info( + const struct pipe_ctx *pipe_ctx, + struct audio_dp_link_info *dp_link_info); +void enable_fbc( + struct dc *dc, + struct dc_state *context); #endif /* __DC_HWSS_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index f9ee55998b6b..e9fe97f0c4ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -55,7 +55,7 @@ #include "dce/dmub_hw_lock_mgr.h" #include "dc_trace.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER \ @@ -327,6 +327,46 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx) } } + DTN_INFO("\n=======HUBP FL======\n"); + static const char * const pLabels[] = { + "inst", "Enabled ", "Done ", "adr_mode ", "width ", "mpc_width ", + "tmz", "xbar_sel_R", "xbar_sel_G", "xbar_sel_B", "adr_hi ", + "adr_low", "REFCYC", "Bias", "Scale", "Mode", + "Format", "prefetch"}; + + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct dcn_fl_regs_st *fl_regs = &s->fl_regs; + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr; + + if (!s->blank_en) { + uint32_t values[] = { + pool->hubps[i]->inst, + fl_regs->lut_enable, + fl_regs->lut_done, + fl_regs->lut_addr_mode, + fl_regs->lut_width, + fl_regs->lut_mpc_width, + fl_regs->lut_tmz, + fl_regs->lut_crossbar_sel_r, + fl_regs->lut_crossbar_sel_g, + fl_regs->lut_crossbar_sel_b, + fl_regs->lut_addr_hi, + fl_regs->lut_addr_lo, + fl_regs->refcyc_3dlut_group, + fl_regs->lut_fl_bias, + fl_regs->lut_fl_scale, + fl_regs->lut_fl_mode, + fl_regs->lut_fl_format, + dlg_regs->dst_y_prefetch}; + + int num_elements = 18; + + for (int j = 0; j < num_elements; j++) + DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]); + } + } + DTN_INFO("\n=========RQ========\n"); DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s" " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s" @@ -511,6 +551,60 @@ static void dcn10_log_color_state(struct dc *dc, dc->caps.color.mpc.num_3dluts, dc->caps.color.mpc.ogam_ram, dc->caps.color.mpc.ocsc); + DTN_INFO("===== MPC RMCM 3DLUT =====\n"); + static const char * const pLabels[] = { + "MPCC", "SIZE", "MODE", "MODE_CUR", "RD_SEL", + "30BIT_EN", "WR_EN_MASK", "RAM_SEL", "OUT_NORM_FACTOR", "FL_SEL", + "OUT_OFFSET", "OUT_SCALE", "FL_DONE", "SOFT_UNDERFLOW", "HARD_UNDERFLOW", + "MEM_PWR_ST", "FORCE", "DIS", "MODE"}; + + for (i = 0; i < pool->mpcc_count; i++) { + struct mpcc_state s = {0}; + + pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s); + if (s.opp_id != 0xf) { + uint32_t values[] = { + i, + s.rmcm_regs.rmcm_3dlut_size, + s.rmcm_regs.rmcm_3dlut_mode, + s.rmcm_regs.rmcm_3dlut_mode_cur, + s.rmcm_regs.rmcm_3dlut_read_sel, + s.rmcm_regs.rmcm_3dlut_30bit_en, + s.rmcm_regs.rmcm_3dlut_wr_en_mask, + s.rmcm_regs.rmcm_3dlut_ram_sel, + s.rmcm_regs.rmcm_3dlut_out_norm_factor, + s.rmcm_regs.rmcm_3dlut_fl_sel, + s.rmcm_regs.rmcm_3dlut_out_offset_r, + s.rmcm_regs.rmcm_3dlut_out_scale_r, + s.rmcm_regs.rmcm_3dlut_fl_done, + s.rmcm_regs.rmcm_3dlut_fl_soft_underflow, + s.rmcm_regs.rmcm_3dlut_fl_hard_underflow, + s.rmcm_regs.rmcm_3dlut_mem_pwr_state, + s.rmcm_regs.rmcm_3dlut_mem_pwr_force, + s.rmcm_regs.rmcm_3dlut_mem_pwr_dis, + s.rmcm_regs.rmcm_3dlut_mem_pwr_mode}; + + int num_elements = 19; + + for (int j = 0; j < num_elements; j++) + DTN_INFO("%s \t %8xh\n", pLabels[j], values[j]); + } + } + DTN_INFO("\n"); + DTN_INFO("===== MPC RMCM Shaper =====\n"); + DTN_INFO("MPCC: CNTL LUT_MODE MODE_CUR WR_EN_MASK WR_SEL OFFSET SCALE START_B START_SEG_B END_B END_BASE_B MEM_PWR_ST FORCE DIS MODE\n"); + for (i = 0; i < pool->mpcc_count; i++) { + struct mpcc_state s = {0}; + + pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s); + if (s.opp_id != 0xf) + DTN_INFO("[%2d]: %4xh %4xh %6xh %4x %4x %4x %4x %4x %4xh %4xh %6xh %4x %4x %4x %4x\n", + i, s.rmcm_regs.rmcm_cntl, s.rmcm_regs.rmcm_shaper_lut_mode, s.rmcm_regs.rmcm_shaper_mode_cur, + s.rmcm_regs.rmcm_shaper_lut_write_en_mask, s.rmcm_regs.rmcm_shaper_lut_write_sel, s.rmcm_regs.rmcm_shaper_offset_b, + s.rmcm_regs.rmcm_shaper_scale_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_start_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_start_seg_b, + s.rmcm_regs.rmcm_shaper_rama_exp_region_end_b, s.rmcm_regs.rmcm_shaper_rama_exp_region_end_base_b, s.rmcm_regs.rmcm_shaper_mem_pwr_state, + s.rmcm_regs.rmcm_shaper_mem_pwr_force, s.rmcm_regs.rmcm_shaper_mem_pwr_dis, s.rmcm_regs.rmcm_shaper_mem_pwr_mode); + } } void dcn10_log_hw_state(struct dc *dc, @@ -3253,7 +3347,7 @@ void dcn10_prepare_bandwidth( context, false); - dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required = hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); @@ -3569,6 +3663,8 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) int y_plane = pipe_ctx->plane_state->dst_rect.y; int x_pos = pos_cpy.x; int y_pos = pos_cpy.y; + int clip_x = pipe_ctx->plane_state->clip_rect.x; + int clip_width = pipe_ctx->plane_state->clip_rect.width; if ((pipe_ctx->top_pipe != NULL) || (pipe_ctx->bottom_pipe != NULL)) { if ((pipe_ctx->plane_state->src_rect.width != pipe_ctx->plane_res.scl_data.viewport.width) || @@ -3587,7 +3683,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) */ /** - * Translate cursor from stream space to plane space. + * Translate cursor and clip offset from stream space to plane space. * * If the cursor is scaled then we need to scale the position * to be in the approximately correct place. We can't do anything @@ -3604,6 +3700,10 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state->dst_rect.width; y_pos = (y_pos - y_plane) * pipe_ctx->plane_state->src_rect.height / pipe_ctx->plane_state->dst_rect.height; + clip_x = (clip_x - x_plane) * pipe_ctx->plane_state->src_rect.width / + pipe_ctx->plane_state->dst_rect.width; + clip_width = clip_width * pipe_ctx->plane_state->src_rect.width / + pipe_ctx->plane_state->dst_rect.width; } /** @@ -3650,30 +3750,18 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) if (param.rotation == ROTATION_ANGLE_0) { - int viewport_width = - pipe_ctx->plane_res.scl_data.viewport.width; - int viewport_x = - pipe_ctx->plane_res.scl_data.viewport.x; if (param.mirror) { - if (pipe_split_on || odm_combine_on) { - if (pos_cpy.x >= viewport_width + viewport_x) { - pos_cpy.x = 2 * viewport_width - - pos_cpy.x + 2 * viewport_x; - } else { - uint32_t temp_x = pos_cpy.x; - - pos_cpy.x = 2 * viewport_x - pos_cpy.x; - if (temp_x >= viewport_x + - (int)hubp->curs_attr.width || pos_cpy.x - <= (int)hubp->curs_attr.width + - pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; - } - } - } else { - pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x; - } + /* + * The plane is split into multiple viewports. + * The combination of all viewports span the + * entirety of the clip rect. + * + * For no pipe_split, viewport_width is represents + * the full width of the clip_rect, so we can just + * mirror it. + */ + pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x; } } // Swap axis and mirror horizontally @@ -3743,30 +3831,17 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) } // Mirror horizontally and vertically else if (param.rotation == ROTATION_ANGLE_180) { - int viewport_width = - pipe_ctx->plane_res.scl_data.viewport.width; - int viewport_x = - pipe_ctx->plane_res.scl_data.viewport.x; - if (!param.mirror) { - if (pipe_split_on || odm_combine_on) { - if (pos_cpy.x >= viewport_width + viewport_x) { - pos_cpy.x = 2 * viewport_width - - pos_cpy.x + 2 * viewport_x; - } else { - uint32_t temp_x = pos_cpy.x; - - pos_cpy.x = 2 * viewport_x - pos_cpy.x; - if (temp_x >= viewport_x + - (int)hubp->curs_attr.width || pos_cpy.x - <= (int)hubp->curs_attr.width + - pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; - } - } - } else { - pos_cpy.x = viewport_width - pos_cpy.x + 2 * viewport_x; - } + /* + * The plane is split into multiple viewports. + * The combination of all viewports span the + * entirety of the clip rect. + * + * For no pipe_split, viewport_width is represents + * the full width of the clip_rect, so we can just + * mirror it. + */ + pos_cpy.x = clip_width - pos_cpy.x + 2 * clip_x; } /** diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index c277df12c817..9477c9f9e196 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -54,7 +54,7 @@ #include "dpcd_defs.h" #include "inc/link_enc_cfg.h" #include "link_hwss.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" #define DC_LOGGER \ @@ -283,14 +283,13 @@ void dcn20_setup_gsl_group_as_lock( } /* at this point we want to program whether it's to enable or disable */ - if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL && - pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) { + if (pipe_ctx->stream_res.tg->funcs->set_gsl != NULL) { pipe_ctx->stream_res.tg->funcs->set_gsl( pipe_ctx->stream_res.tg, &gsl); - - pipe_ctx->stream_res.tg->funcs->set_gsl_source_select( - pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0); + if (pipe_ctx->stream_res.tg->funcs->set_gsl_source_select != NULL) + pipe_ctx->stream_res.tg->funcs->set_gsl_source_select( + pipe_ctx->stream_res.tg, group_idx, enable ? 4 : 0); } else BREAK_TO_DEBUGGER(); } @@ -956,7 +955,7 @@ enum dc_status dcn20_enable_stream_timing( return DC_ERROR_UNEXPECTED; } - hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp); + udelay(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz)); params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; @@ -1971,14 +1970,6 @@ static void dcn20_program_pipe( pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); - if (hws->funcs.populate_mcm_luts) { - if (pipe_ctx->plane_state) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; - } - } - if (pipe_ctx->plane_state && (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || pipe_ctx->plane_state->update_flags.bits.gamma_change || @@ -1991,10 +1982,8 @@ static void dcn20_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -2399,10 +2388,10 @@ void dcn20_prepare_bandwidth( } /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); @@ -2415,10 +2404,10 @@ void dcn20_prepare_bandwidth( if (hubbub->funcs->program_compbuf_size) { if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) { compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes); } else { compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); + dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb); } hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false); @@ -2492,7 +2481,7 @@ bool dcn20_update_bandwidth( struct dce_hwseq *hws = dc->hwseq; /* recalculate DML parameters */ - if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) + if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) return false; /* apply updated bandwidth parameters */ @@ -2816,6 +2805,8 @@ void dcn20_reset_back_end_for_pipe( { struct dc_link *link = pipe_ctx->stream->link; const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); + struct dccg *dccg = dc->res_pool->dccg; + struct dtbclk_dto_params dto_params = {0}; DC_LOGGER_INIT(dc->ctx->logger); if (pipe_ctx->stream_res.stream_enc == NULL) { @@ -2876,6 +2867,13 @@ void dcn20_reset_back_end_for_pipe( &pipe_ctx->link_res, pipe_ctx->stream->signal); link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF; } + if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx) && dccg + && dc->ctx->dce_version >= DCN_VERSION_3_5) { + dto_params.otg_inst = pipe_ctx->stream_res.tg->inst; + dto_params.timing = &pipe_ctx->stream->timing; + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + } } /* @@ -3054,6 +3052,8 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) link_enc->transmitter - TRANSMITTER_UNIPHY_A); } + link_hwss->setup_stream_attribute(pipe_ctx); + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) dc->res_pool->dccg->funcs->set_pixel_rate_div( dc->res_pool->dccg, @@ -3129,7 +3129,8 @@ void dcn20_fpga_init_hw(struct dc *dc) res_pool->dccg->funcs->dccg_init(res_pool->dccg); //Enable ability to power gate / don't force power on permanently - hws->funcs.enable_power_gating_plane(hws, true); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(hws, true); // Specific to FPGA dccg and registers REG_WRITE(RBBMIF_TIMEOUT_DIS, 0xFFFFFFFF); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 61efb15572ff..e2269211553c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -35,7 +35,7 @@ #include "hw/clk_mgr.h" #include "dc_dmub_srv.h" #include "abm.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index 37a239219dfe..e47ed5571dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -50,7 +50,7 @@ #include "dpcd_defs.h" #include "dcn20/dcn20_hwseq.h" #include "dcn30/dcn30_resource.h" -#include "link.h" +#include "link_service.h" #include "dc_state_priv.h" @@ -1228,3 +1228,51 @@ void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx) } } } + +void dcn30_get_underflow_debug_data(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + if (tg) { + uint32_t v_blank_start = 0, v_blank_end = 0; + + out_data->otg_inst = tg->inst; + + tg->funcs->get_scanoutpos(tg, + &v_blank_start, + &v_blank_end, + &out_data->h_position, + &out_data->v_position); + + out_data->otg_frame_count = tg->funcs->get_frame_count(tg); + + out_data->otg_underflow = tg->funcs->is_optc_underflow_occurred(tg); + } + + for (int i = 0; i < MAX_PIPES; i++) { + struct hubp *hubp = dc->res_pool->hubps[i]; + + if (hubp) { + if (hubp->funcs->hubp_get_underflow_status) + out_data->hubps[i].hubp_underflow = hubp->funcs->hubp_get_underflow_status(hubp); + + if (hubp->funcs->hubp_in_blank) + out_data->hubps[i].hubp_in_blank = hubp->funcs->hubp_in_blank(hubp); + + if (hubp->funcs->hubp_get_current_read_line) + out_data->hubps[i].hubp_readline = hubp->funcs->hubp_get_current_read_line(hubp); + + if (hubp->funcs->hubp_get_det_config_error) + out_data->hubps[i].det_config_error = hubp->funcs->hubp_get_det_config_error(hubp); + } + } + + if (hubbub->funcs->get_det_sizes) + hubbub->funcs->get_det_sizes(hubbub, out_data->curr_det_sizes, out_data->target_det_sizes); + + if (hubbub->funcs->compbuf_config_error) + out_data->compbuf_config_error = hubbub->funcs->compbuf_config_error(hubbub); + +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h index 4b90b781c4f2..40afbbfb5b9c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.h @@ -29,6 +29,7 @@ #include "hw_sequencer_private.h" struct dc; +struct dc_underflow_debug_data; void dcn30_init_hw(struct dc *dc); void dcn30_program_all_writeback_pipes_in_tree( @@ -98,4 +99,8 @@ void dcn30_prepare_bandwidth(struct dc *dc, void dcn30_wait_for_all_pending_updates(const struct pipe_ctx *pipe_ctx); +void dcn30_get_underflow_debug_data(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data); + #endif /* __DC_HWSS_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 2ac5d54d1626..d7ff55669bac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -110,6 +110,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .update_visual_confirm_color = dcn10_update_visual_confirm_color, .is_abm_supported = dcn21_is_abm_supported, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn30_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 5ba3999991b0..b822f2dffff0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -45,7 +45,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "dcn21/dcn21_hwseq.h" #include "inc/link_enc_cfg.h" @@ -562,6 +562,19 @@ static void dcn31_reset_back_end_for_pipe( else if (pipe_ctx->stream_res.audio) dc->hwss.disable_audio_stream(pipe_ctx); + /* Temporary workaround to perform DSC programming ahead of pipe reset + * for smartmux/SPRS + * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized + */ + if (pipe_ctx->stream->timing.flags.DSC) { + if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest) + || link->is_dds || link->skip_implict_edp_power_control)) && + (dc_is_dp_signal(pipe_ctx->stream->signal) || + dc_is_virtual_signal(pipe_ctx->stream->signal))) + dc->link_srv->set_dsc_enable(pipe_ctx, false); + } + /* free acquired resources */ if (pipe_ctx->stream_res.audio) { /*disable az_endpoint*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 556f4fe57eda..5a6a459da224 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -112,6 +112,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn31_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index e68f21fd5f0f..f925f669f2a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -46,7 +46,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "inc/link_enc_cfg.h" #include "dcn30/dcn30_vpg.h" @@ -528,3 +528,75 @@ void dcn314_disable_link_output(struct dc_link *link, apply_symclk_on_tx_off_wa(link); } + +/** + * dcn314_dpp_pg_control - DPP power gate control. + * + * @hws: dce_hwseq reference. + * @dpp_inst: DPP instance reference. + * @power_on: true if we want to enable power gate, false otherwise. + * + * Enable or disable power gate in the specific DPP instance. + * If power gating is disabled, will force disable cursor in the DPP instance. + */ +void dcn314_dpp_pg_control( + struct dce_hwseq *hws, + unsigned int dpp_inst, + bool power_on) +{ + uint32_t power_gate = power_on ? 0 : 1; + uint32_t pwr_status = power_on ? 0 : 2; + + + if (hws->ctx->dc->debug.disable_dpp_power_gate) { + /* Workaround for DCN314 with disabled power gating */ + if (!power_on) { + + /* Force disable cursor if power gating is disabled */ + struct dpp *dpp = hws->ctx->dc->res_pool->dpps[dpp_inst]; + if (dpp && dpp->funcs->dpp_force_disable_cursor) + dpp->funcs->dpp_force_disable_cursor(dpp); + } + return; + } + if (REG(DOMAIN1_PG_CONFIG) == 0) + return; + + switch (dpp_inst) { + case 0: /* DPP0 */ + REG_UPDATE(DOMAIN1_PG_CONFIG, + DOMAIN1_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN1_PG_STATUS, + DOMAIN1_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 1: /* DPP1 */ + REG_UPDATE(DOMAIN3_PG_CONFIG, + DOMAIN3_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN3_PG_STATUS, + DOMAIN3_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 2: /* DPP2 */ + REG_UPDATE(DOMAIN5_PG_CONFIG, + DOMAIN5_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN5_PG_STATUS, + DOMAIN5_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + case 3: /* DPP3 */ + REG_UPDATE(DOMAIN7_PG_CONFIG, + DOMAIN7_POWER_GATE, power_gate); + + REG_WAIT(DOMAIN7_PG_STATUS, + DOMAIN7_PGFSM_PWR_STATUS, pwr_status, + 1, 1000); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index 2305ad282f21..6c072d0274ea 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -47,4 +47,6 @@ void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); +void dcn314_dpp_pg_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool power_on); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index f5112742edf9..79faab1125d4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -115,6 +115,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn314_private_funcs = { @@ -141,6 +142,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .enable_power_gating_plane = dcn314_enable_power_gating_plane, .dpp_root_clock_control = dcn314_dpp_root_clock_control, .hubp_pg_control = dcn31_hubp_pg_control, + .dpp_pg_control = dcn314_dpp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, .update_odm = dcn314_update_odm, .dsc_pg_control = dcn314_dsc_pg_control, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index a0b05b9ef660..f39292952702 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -49,7 +49,7 @@ #include "dcn20/dcn20_optc.h" #include "dce/dmub_hw_lock_mgr.h" #include "dcn32/dcn32_resource.h" -#include "link.h" +#include "link_service.h" #include "../dcn20/dcn20_hwseq.h" #include "dc_state_priv.h" @@ -1052,7 +1052,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; @@ -1063,15 +1063,17 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst); + dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); + if (!odm_dsc) + continue; if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index b971356d30b1..c19ef075c882 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -121,6 +121,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .program_outstanding_updates = dcn32_program_outstanding_updates, .wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn32_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index c814d957305a..05011061822c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -46,7 +46,7 @@ #include "link_hwss.h" #include "dpcd_defs.h" #include "dce/dmub_outbox.h" -#include "link.h" +#include "link_service.h" #include "dcn10/dcn10_hwseq.h" #include "inc/link_enc_cfg.h" #include "dcn30/dcn30_vpg.h" @@ -113,6 +113,14 @@ static void enable_memory_low_power(struct dc *dc) } #endif +static void print_pg_status(struct dc *dc, const char *debug_func, const char *debug_log) +{ + if (dc->debug.enable_pg_cntl_debug_logs && dc->res_pool->pg_cntl) { + if (dc->res_pool->pg_cntl->funcs->print_pg_status) + dc->res_pool->pg_cntl->funcs->print_pg_status(dc->res_pool->pg_cntl, debug_func, debug_log); + } +} + void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable) { REG_UPDATE_3(DMU_CLK_CNTL, @@ -137,6 +145,8 @@ void dcn35_init_hw(struct dc *dc) uint32_t user_level = MAX_BACKLIGHT_LEVEL; int i; + print_pg_status(dc, __func__, ": start"); + if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -200,10 +210,7 @@ void dcn35_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc->link_srv->blank_all_dp_displays(dc); -/* - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); -*/ + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first @@ -236,6 +243,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); + print_pg_status(dc, __func__, ": after init_pipes"); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, @@ -312,6 +321,7 @@ void dcn35_init_hw(struct dc *dc) if (dc->res_pool->pg_cntl->funcs->init_pg_status) dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl); } + print_pg_status(dc, __func__, ": after init_pg_status"); } static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) @@ -500,97 +510,6 @@ void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_ } } -void dcn35_dsc_pg_control( - struct dce_hwseq *hws, - unsigned int dsc_inst, - bool power_on) -{ - uint32_t power_gate = power_on ? 0 : 1; - uint32_t pwr_status = power_on ? 0 : 2; - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_dsc_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - - switch (dsc_inst) { - case 0: /* DSC0 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN16_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 1: /* DSC1 */ - REG_UPDATE(DOMAIN17_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN17_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 2: /* DSC2 */ - REG_UPDATE(DOMAIN18_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN18_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - case 3: /* DSC3 */ - REG_UPDATE(DOMAIN19_PG_CONFIG, - DOMAIN_POWER_GATE, power_gate); - - REG_WAIT(DOMAIN19_PG_STATUS, - DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); - break; - default: - BREAK_TO_DEBUGGER(); - break; - } - - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); -} - -void dcn35_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) -{ - bool force_on = true; /* disable power gating */ - uint32_t org_ip_request_cntl = 0; - - if (hws->ctx->dc->debug.disable_hubp_power_gate) - return; - if (hws->ctx->dc->debug.ignore_pg) - return; - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - /* DCHUBP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - /* DPP0/1/2/3/4/5 */ - REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - force_on = true; /* disable power gating */ - if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) - force_on = false; - - /* DCS0/1/2/3/4 */ - REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_UPDATE(DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - - -} - /* In headless boot cases, DIG may be turned * on which causes HW/SW discrepancies. * To avoid this, power down hardware on boot @@ -1047,6 +966,15 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (dc->caps.sequential_ono) { update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp && + pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = false; + update_state->pg_pipe_res_update[PG_DPP][j] = false; + } + } } } @@ -1193,6 +1121,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; if (dc->caps.sequential_ono) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if (new_pipe->stream_res.dsc && !new_pipe->top_pipe && + update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) { + update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true; + update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (new_pipe->plane_res.hubp && + new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = true; + update_state->pg_pipe_res_update[PG_DPP][j] = true; + } + } + } + } + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { @@ -1425,6 +1372,8 @@ void dcn35_prepare_bandwidth( } dcn20_prepare_bandwidth(dc, context); + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_optimize_bandwidth( @@ -1433,6 +1382,8 @@ void dcn35_optimize_bandwidth( { struct pg_block_update pg_update_state; + print_pg_status(dc, __func__, ": before rcg and power up"); + dcn20_optimize_bandwidth(dc, context); if (dc->hwss.calc_blocks_to_gate) { @@ -1444,6 +1395,8 @@ void dcn35_optimize_bandwidth( if (dc->hwss.root_clock_control) dc->hwss.root_clock_control(dc, &pg_update_state, false); } + + print_pg_status(dc, __func__, ": after rcg and power up"); } void dcn35_set_drr(struct pipe_ctx **pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index a3ccf805bd16..f2f16a0bdb4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -115,7 +115,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn35_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate, .hw_block_power_up = dcn35_hw_block_power_up, @@ -128,6 +127,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .enable_plane = dcn20_enable_plane, .update_dchubp_dpp = dcn20_update_dchubp_dpp, .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn35_private_funcs = { @@ -150,7 +150,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -165,7 +164,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = { .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 58f2be2a326b..09e60158f0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -114,7 +114,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations, - .update_dsc_pg = dcn32_update_dsc_pg, .calc_blocks_to_gate = dcn351_calc_blocks_to_gate, .calc_blocks_to_ungate = dcn351_calc_blocks_to_ungate, .hw_block_power_up = dcn351_hw_block_power_up, @@ -123,6 +122,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn351_private_funcs = { @@ -145,7 +145,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .plane_atomic_disable = dcn35_plane_atomic_disable, //.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/ //.hubp_pg_control = dcn35_hubp_pg_control, - .enable_power_gating_plane = dcn35_enable_power_gating_plane, .dpp_root_clock_control = dcn35_dpp_root_clock_control, .dpstream_root_clock_control = dcn35_dpstream_root_clock_control, .physymclk_root_clock_control = dcn35_physymclk_root_clock_control, @@ -159,7 +158,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = { .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values, .is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy, - .dsc_pg_control = dcn35_dsc_pg_control, .dsc_pg_status = dcn32_dsc_pg_status, .enable_plane = dcn35_enable_plane, .wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index c4177a9a662f..ce3d0b45fb4c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2,6 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. + +#include "os_types.h" #include "dm_services.h" #include "basics/dc_common.h" #include "dm_helpers.h" @@ -23,7 +25,7 @@ #include "dpcd_defs.h" #include "clk_mgr.h" #include "dsc.h" -#include "link.h" +#include "link_service.h" #include "dce/dmub_hw_lock_mgr.h" #include "dcn10/dcn10_cm_common.h" @@ -49,7 +51,7 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name -static void dcn401_initialize_min_clocks(struct dc *dc) +void dcn401_initialize_min_clocks(struct dc *dc) { struct dc_clocks *clocks = &dc->current_state->bw_ctx.bw.dcn.clk; @@ -143,13 +145,8 @@ void dcn401_init_hw(struct dc *dc) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // mark dcmode limits present if any clock has distinct AC and DC values from SMU - dc->caps.dcmode_power_limits_present = - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.dcfclk_mhz) || - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.dispclk_mhz) || - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.dtbclk_mhz) || - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.fclk_mhz) || - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.memclk_mhz) || - (dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels && dc->clk_mgr->bw_params->dc_mode_limit.socclk_mhz); + dc->caps.dcmode_power_limits_present = dc->clk_mgr->funcs->is_dc_mode_present && + dc->clk_mgr->funcs->is_dc_mode_present(dc->clk_mgr); } // Initialize the dccg @@ -203,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; + if (link->ep_type != DISPLAY_ENDPOINT_PHY) + continue; + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ @@ -396,249 +396,6 @@ static void dcn401_get_mcm_lut_xable_from_pipe_ctx(struct dc *dc, struct pipe_ct } } -static void dcn401_set_mcm_location_post_blend(struct dc *dc, struct pipe_ctx *pipe_ctx, bool bPostBlend) -{ - struct mpc *mpc = dc->res_pool->mpc; - int mpcc_id = pipe_ctx->plane_res.hubp->inst; - - if (!pipe_ctx->plane_state) - return; - - mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); - pipe_ctx->plane_state->mcm_location = (bPostBlend) ? - MPCC_MOVABLE_CM_LOCATION_AFTER : - MPCC_MOVABLE_CM_LOCATION_BEFORE; -} - -static void dc_get_lut_mode( - enum dc_cm2_gpu_mem_layout layout, - enum hubp_3dlut_fl_mode *mode, - enum hubp_3dlut_fl_addressing_mode *addr_mode) -{ - switch (layout) { - case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: - *mode = hubp_3dlut_fl_mode_native_1; - *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; - break; - case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: - *mode = hubp_3dlut_fl_mode_native_2; - *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; - break; - case DC_CM2_GPU_MEM_LAYOUT_1D_PACKED_LINEAR: - *mode = hubp_3dlut_fl_mode_transform; - *addr_mode = hubp_3dlut_fl_addressing_mode_simple_linear; - break; - default: - *mode = hubp_3dlut_fl_mode_disable; - *addr_mode = hubp_3dlut_fl_addressing_mode_sw_linear; - break; - } -} - -static void dc_get_lut_format( - enum dc_cm2_gpu_mem_format dc_format, - enum hubp_3dlut_fl_format *format) -{ - switch (dc_format) { - case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12MSB: - *format = hubp_3dlut_fl_format_unorm_12msb_bitslice; - break; - case DC_CM2_GPU_MEM_FORMAT_16161616_UNORM_12LSB: - *format = hubp_3dlut_fl_format_unorm_12lsb_bitslice; - break; - case DC_CM2_GPU_MEM_FORMAT_16161616_FLOAT_FP1_5_10: - *format = hubp_3dlut_fl_format_float_fp1_5_10; - break; - } -} - -static void dc_get_lut_xbar( - enum dc_cm2_gpu_mem_pixel_component_order order, - enum hubp_3dlut_fl_crossbar_bit_slice *cr_r, - enum hubp_3dlut_fl_crossbar_bit_slice *y_g, - enum hubp_3dlut_fl_crossbar_bit_slice *cb_b) -{ - switch (order) { - case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: - *cr_r = hubp_3dlut_fl_crossbar_bit_slice_32_47; - *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; - *cb_b = hubp_3dlut_fl_crossbar_bit_slice_0_15; - break; - case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_BGRA: - *cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; - *y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; - *cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; - break; - } -} - -static void dc_get_lut_width( - enum dc_cm2_gpu_mem_size size, - enum hubp_3dlut_fl_width *width) -{ - switch (size) { - case DC_CM2_GPU_MEM_SIZE_333333: - *width = hubp_3dlut_fl_width_33; - break; - case DC_CM2_GPU_MEM_SIZE_171717: - *width = hubp_3dlut_fl_width_17; - break; - case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: - *width = hubp_3dlut_fl_width_transformed; - break; - } -} -static bool dc_is_rmcm_3dlut_supported(struct hubp *hubp, struct mpc *mpc) -{ - if (mpc->funcs->rmcm.update_3dlut_fast_load_select && - mpc->funcs->rmcm.program_lut_read_write_control && - hubp->funcs->hubp_program_3dlut_fl_addr && - mpc->funcs->rmcm.program_bit_depth && - hubp->funcs->hubp_program_3dlut_fl_mode && - hubp->funcs->hubp_program_3dlut_fl_addressing_mode && - hubp->funcs->hubp_program_3dlut_fl_format && - hubp->funcs->hubp_update_3dlut_fl_bias_scale && - mpc->funcs->rmcm.program_bias_scale && - hubp->funcs->hubp_program_3dlut_fl_crossbar && - hubp->funcs->hubp_program_3dlut_fl_width && - mpc->funcs->rmcm.update_3dlut_fast_load_select && - mpc->funcs->rmcm.populate_lut && - mpc->funcs->rmcm.program_lut_mode && - hubp->funcs->hubp_enable_3dlut_fl && - mpc->funcs->rmcm.enable_3dlut_fl) - return true; - - return false; -} - -bool dcn401_program_rmcm_luts( - struct hubp *hubp, - struct pipe_ctx *pipe_ctx, - enum dc_cm2_transfer_func_source lut3d_src, - struct dc_cm2_func_luts *mcm_luts, - struct mpc *mpc, - bool lut_bank_a, - int mpcc_id) -{ - struct dpp *dpp_base = pipe_ctx->plane_res.dpp; - union mcm_lut_params m_lut_params; - enum MCM_LUT_XABLE shaper_xable, lut3d_xable = MCM_LUT_DISABLE, lut1d_xable; - enum hubp_3dlut_fl_mode mode; - enum hubp_3dlut_fl_addressing_mode addr_mode; - enum hubp_3dlut_fl_format format = 0; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g = 0; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b = 0; - enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r = 0; - enum hubp_3dlut_fl_width width = 0; - struct dc *dc = hubp->ctx->dc; - - bool bypass_rmcm_3dlut = false; - bool bypass_rmcm_shaper = false; - - dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); - - /* 3DLUT */ - switch (lut3d_src) { - case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: - memset(&m_lut_params, 0, sizeof(m_lut_params)); - // Don't know what to do in this case. - //case DC_CM2_TRANSFER_FUNC_SOURCE_SYSMEM: - break; - case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: - dc_get_lut_width(mcm_luts->lut3d_data.gpu_mem_params.size, &width); - if (!dc_is_rmcm_3dlut_supported(hubp, mpc) || - !mpc->funcs->rmcm.is_config_supported(width)) - return false; - - //0. disable fl on mpc - mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, 0xF); - - //1. power down the block - mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, false); - - //2. program RMCM - //2a. 3dlut reg programming - mpc->funcs->rmcm.program_lut_read_write_control(mpc, MCM_LUT_3DLUT, lut_bank_a, - (!bypass_rmcm_3dlut) && lut3d_xable != MCM_LUT_DISABLE, mpcc_id); - - hubp->funcs->hubp_program_3dlut_fl_addr(hubp, - mcm_luts->lut3d_data.gpu_mem_params.addr); - - mpc->funcs->rmcm.program_bit_depth(mpc, - mcm_luts->lut3d_data.gpu_mem_params.bit_depth, mpcc_id); - - // setting native or transformed mode, - dc_get_lut_mode(mcm_luts->lut3d_data.gpu_mem_params.layout, &mode, &addr_mode); - - //these program the mcm 3dlut - hubp->funcs->hubp_program_3dlut_fl_mode(hubp, mode); - - hubp->funcs->hubp_program_3dlut_fl_addressing_mode(hubp, addr_mode); - - //seems to be only for the MCM - dc_get_lut_format(mcm_luts->lut3d_data.gpu_mem_params.format_params.format, &format); - hubp->funcs->hubp_program_3dlut_fl_format(hubp, format); - - mpc->funcs->rmcm.program_bias_scale(mpc, - mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, - mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale, - mpcc_id); - hubp->funcs->hubp_update_3dlut_fl_bias_scale(hubp, - mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.bias, - mcm_luts->lut3d_data.gpu_mem_params.format_params.float_params.scale); - - dc_get_lut_xbar( - mcm_luts->lut3d_data.gpu_mem_params.component_order, - &crossbar_bit_slice_cr_r, - &crossbar_bit_slice_y_g, - &crossbar_bit_slice_cb_b); - - hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, - crossbar_bit_slice_cr_r, - crossbar_bit_slice_y_g, - crossbar_bit_slice_cb_b); - - mpc->funcs->rmcm.program_3dlut_size(mpc, width, mpcc_id); - - mpc->funcs->update_3dlut_fast_load_select(mpc, mpcc_id, hubp->inst); - - //2b. shaper reg programming - memset(&m_lut_params, 0, sizeof(m_lut_params)); - - if (mcm_luts->shaper->type == TF_TYPE_HWPWL) { - m_lut_params.pwl = &mcm_luts->shaper->pwl; - } else if (mcm_luts->shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { - ASSERT(false); - cm_helper_translate_curve_to_hw_format( - dc->ctx, - mcm_luts->shaper, - &dpp_base->regamma_params, true); - m_lut_params.pwl = &dpp_base->regamma_params; - } - if (m_lut_params.pwl) { - mpc->funcs->rmcm.populate_lut(mpc, m_lut_params, lut_bank_a, mpcc_id); - mpc->funcs->rmcm.program_lut_mode(mpc, !bypass_rmcm_shaper, lut_bank_a, mpcc_id); - } else { - //RMCM 3dlut won't work without its shaper - return false; - } - - //3. Select the hubp connected to this RMCM - hubp->funcs->hubp_enable_3dlut_fl(hubp, true); - mpc->funcs->rmcm.enable_3dlut_fl(mpc, true, mpcc_id); - - //4. power on the block - if (m_lut_params.pwl) - mpc->funcs->rmcm.power_on_shaper_3dlut(mpc, mpcc_id, true); - - break; - default: - return false; - } - - return true; -} - void dcn401_populate_mcm_luts(struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_cm2_func_luts mcm_luts, @@ -664,25 +421,6 @@ void dcn401_populate_mcm_luts(struct dc *dc, dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); - //MCM - setting its location (Before/After) blender - //set to post blend (true) - dcn401_set_mcm_location_post_blend( - dc, - pipe_ctx, - mcm_luts.lut3d_data.mpc_mcm_post_blend); - - //RMCM - 3dLUT+Shaper - if (mcm_luts.lut3d_data.rmcm_3dlut_enable) { - dcn401_program_rmcm_luts( - hubp, - pipe_ctx, - lut3d_src, - &mcm_luts, - mpc, - lut_bank_a, - mpcc_id); - } - /* 1D LUT */ if (mcm_luts.lut1d_func) { memset(&m_lut_params, 0, sizeof(m_lut_params)); @@ -740,15 +478,15 @@ void dcn401_populate_mcm_luts(struct dc *dc, break; case DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM: switch (mcm_luts.lut3d_data.gpu_mem_params.size) { - case DC_CM2_GPU_MEM_SIZE_333333: - width = hubp_3dlut_fl_width_33; - break; case DC_CM2_GPU_MEM_SIZE_171717: width = hubp_3dlut_fl_width_17; break; case DC_CM2_GPU_MEM_SIZE_TRANSFORMED: width = hubp_3dlut_fl_width_transformed; break; + default: + //TODO: handle default case + break; } //check for support @@ -817,11 +555,14 @@ void dcn401_populate_mcm_luts(struct dc *dc, //navi 4x has a bug and r and blue are swapped and need to be worked around here in //TODO: need to make a method for get_xbar per asic OR do the workaround in program_crossbar for 4x - dc_get_lut_xbar( - mcm_luts.lut3d_data.gpu_mem_params.component_order, - &crossbar_bit_slice_cr_r, - &crossbar_bit_slice_y_g, - &crossbar_bit_slice_cb_b); + switch (mcm_luts.lut3d_data.gpu_mem_params.component_order) { + case DC_CM2_GPU_MEM_PIXEL_COMPONENT_ORDER_RGBA: + default: + crossbar_bit_slice_cr_r = hubp_3dlut_fl_crossbar_bit_slice_0_15; + crossbar_bit_slice_y_g = hubp_3dlut_fl_crossbar_bit_slice_16_31; + crossbar_bit_slice_cb_b = hubp_3dlut_fl_crossbar_bit_slice_32_47; + break; + } if (hubp->funcs->hubp_program_3dlut_fl_crossbar) hubp->funcs->hubp_program_3dlut_fl_crossbar(hubp, @@ -1072,9 +813,12 @@ enum dc_status dcn401_enable_stream_timing( if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal))) dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx); - /* if we are borrowing from hblank, h_addressable needs to be adjusted */ - if (dc->debug.enable_hblank_borrow) - patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow; + /* if we are padding, h_addressable needs to be adjusted */ + if (dc->debug.enable_hblank_borrow) { + patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding; + patched_crtc_timing.h_total = patched_crtc_timing.h_total + pipe_ctx->dsc_padding_params.dsc_htotal_padding; + patched_crtc_timing.pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + } pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, @@ -1227,6 +971,8 @@ void dcn401_enable_stream(struct pipe_ctx *pipe_ctx) } } + link_hwss->setup_stream_attribute(pipe_ctx); + if (dc->res_pool->dccg->funcs->set_pixel_rate_div) { dc->res_pool->dccg->funcs->set_pixel_rate_div( dc->res_pool->dccg, @@ -1640,22 +1386,22 @@ void dcn401_prepare_bandwidth(struct dc *dc, false); /* program dchubbub watermarks: - * For assigning wm_optimized_required, use |= operator since we don't want + * For assigning optimized_required, use |= operator since we don't want * to clear the value if the optimize has not happened yet */ - dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub, + dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); /* update timeout thresholds */ if (hubbub->funcs->program_arbiter) { - dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + dc->optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + dc->optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } @@ -1881,20 +1627,28 @@ void dcn401_unblank_stream(struct pipe_ctx *pipe_ctx, void dcn401_hardware_release(struct dc *dc) { - dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); - - /* If pstate unsupported, or still supported - * by firmware, force it supported by dcn - */ - if (dc->current_state) { - if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && - dc->res_pool->hubbub->funcs->force_pstate_change_control) - dc->res_pool->hubbub->funcs->force_pstate_change_control( - dc->res_pool->hubbub, true, true); - - dc->current_state->bw_ctx.bw.dcn.clk.p_state_change_support = true; - dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + if (!dc->debug.disable_force_pstate_allow_on_hw_release) { + dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); + + /* If pstate unsupported, or still supported + * by firmware, force it supported by dcn + */ + if (dc->current_state) { + if ((!dc->clk_mgr->clks.p_state_change_support || + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && + dc->res_pool->hubbub->funcs->force_pstate_change_control) + dc->res_pool->hubbub->funcs->force_pstate_change_control( + dc->res_pool->hubbub, true, true); + + dc->current_state->bw_ctx.bw.dcn.clk.p_state_change_support = true; + dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + } + } else { + if (dc->current_state) { + dc->clk_mgr->clks.p_state_change_support = false; + dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, dc->current_state, true); + } + dc_dmub_srv_fams2_update_config(dc, dc->current_state, false); } } @@ -2269,14 +2023,6 @@ void dcn401_program_pipe( pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); - if (hws->funcs.populate_mcm_luts) { - if (pipe_ctx->plane_state) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; - } - } - if (pipe_ctx->plane_state && (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || pipe_ctx->plane_state->update_flags.bits.gamma_change || @@ -2289,10 +2035,8 @@ void dcn401_program_pipe( * updating on slave planes */ if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->update_flags.bits.plane_changed || - pipe_ctx->stream->update_flags.bits.out_tf || - (pipe_ctx->plane_state && - pipe_ctx->plane_state->update_flags.bits.output_tf_change)) + pipe_ctx->update_flags.bits.plane_changed || + pipe_ctx->stream->update_flags.bits.out_tf) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -2651,7 +2395,7 @@ bool dcn401_update_bandwidth( struct dce_hwseq *hws = dc->hwseq; /* recalculate DML parameters */ - if (dc->res_pool->funcs->validate_bandwidth(dc, context, false) != DC_OK) + if (dc->res_pool->funcs->validate_bandwidth(dc, context, DC_VALIDATE_MODE_AND_PROGRAMMING) != DC_OK) return false; /* apply updated bandwidth parameters */ @@ -2902,10 +2646,12 @@ void dcn401_plane_atomic_power_down(struct dc *dc, DC_LOGGER_INIT(dc->ctx->logger); - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 1); + if (REG(DC_IP_REQUEST_CNTL)) { + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); + } if (hws->funcs.dpp_pg_control) hws->funcs.dpp_pg_control(hws, dpp->inst, false); @@ -2916,7 +2662,7 @@ void dcn401_plane_atomic_power_down(struct dc *dc, hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); - if (org_ip_request_cntl == 0) + if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL)) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index ce65b4f6c672..2621b7725267 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,12 +109,5 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); -bool dcn401_program_rmcm_luts( - struct hubp *hubp, - struct pipe_ctx *pipe_ctx, - enum dc_cm2_transfer_func_source lut3d_src, - struct dc_cm2_func_luts *mcm_luts, - struct mpc *mpc, - bool lut_bank_a, - int mpcc_id); +void dcn401_initialize_min_clocks(struct dc *dc); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index fe7aceb2f510..d6e11b7e4fce 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -104,6 +104,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_plane = dcn20_enable_plane, .update_dchubp_dpp = dcn20_update_dchubp_dpp, .post_unlock_reset_opp = dcn20_post_unlock_reset_opp, + .get_underflow_debug_data = dcn30_get_underflow_debug_data, }; static const struct hwseq_private_funcs dcn401_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 3a0795045bc6..1723bbcf2c46 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -47,6 +47,7 @@ struct link_resource; struct dc_dmub_cmd; struct pg_block_update; struct drr_params; +struct dc_underflow_debug_data; struct subvp_pipe_control_lock_fast_params { struct dc *dc; @@ -475,6 +476,9 @@ struct hw_sequencer_funcs { struct dc_state *context); void (*post_unlock_reset_opp)(struct dc *dc, struct pipe_ctx *opp_head); + void (*get_underflow_debug_data)(const struct dc *dc, + struct timing_generator *tg, + struct dc_underflow_debug_data *out_data); }; void color_space_to_black_color( @@ -502,6 +506,9 @@ void get_hdr_visual_confirm_color( void get_mpctree_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); +void get_smartmux_visual_confirm_color( + struct dc *dc, + struct tg_color *color); void get_vabc_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index f3696143590c..82085d9c3f40 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -59,6 +59,7 @@ enum dc_status { DC_FAIL_DP_PAYLOAD_ALLOCATION = 27, DC_FAIL_DP_LINK_BANDWIDTH = 28, DC_FAIL_HW_CURSOR_SUPPORT = 29, + DC_FAIL_DP_TUNNEL_BW_VALIDATE = 30, DC_ERROR_UNEXPECTED = -1 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 0cf349cafb3e..d11893f8c916 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -67,6 +67,8 @@ struct resource_context; struct clk_bw_params; struct dc_mcache_params; +#define MAX_RMCM_INST 2 + struct resource_funcs { enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index); void (*destroy)(struct resource_pool **pool); @@ -82,7 +84,7 @@ struct resource_funcs { enum dc_status (*validate_bandwidth)( struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); void (*calculate_wm_and_dlg)( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -107,7 +109,7 @@ struct resource_funcs { struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); /* * Algorithm for assigning available link encoders to links. @@ -223,6 +225,11 @@ struct resource_funcs { const struct dc_stream_state *stream); bool (*program_mcache_pipe_config)(struct dc_state *context, const struct dc_mcache_params *mcache_params); + enum dc_status (*update_dc_state_for_encoder_switch)(struct dc_link *link, + struct dc_link_settings *link_setting, + uint8_t pipe_count, + struct pipe_ctx *pipes, + struct audio_output *audio_output); }; struct audio_support{ @@ -281,6 +288,7 @@ struct resource_pool { struct hpo_dp_link_encoder *hpo_dp_link_enc[MAX_HPO_DP2_LINK_ENCODERS]; struct dc_3dlut *mpc_lut[MAX_PIPES]; struct dc_transfer_func *mpc_shaper[MAX_PIPES]; + struct dc_rmcm_3dlut rmcm_3dlut[MAX_RMCM_INST]; struct { unsigned int xtalin_clock_inKhz; @@ -425,7 +433,14 @@ enum p_state_switch_method { P_STATE_V_ACTIVE, P_STATE_SUB_VP, P_STATE_DRR_SUB_VP, - P_STATE_V_BLANK_SUB_VP + P_STATE_V_BLANK_SUB_VP, +}; + +struct dsc_padding_params { + /* pixels borrowed from hblank to hactive */ + uint8_t dsc_hactive_padding; + uint32_t dsc_htotal_padding; + uint32_t dsc_pix_clk_100hz; }; struct pipe_ctx { @@ -485,8 +500,7 @@ struct pipe_ctx { /* subvp_index: only valid if the pipe is a SUBVP_MAIN*/ uint8_t subvp_index; struct pixel_rate_divider pixel_rate_divider; - /* pixels borrowed from hblank to hactive */ - uint8_t hblank_borrow; + struct dsc_padding_params dsc_padding_params; /* next vupdate */ uint32_t next_vupdate; uint32_t wait_frame_count; @@ -556,7 +570,10 @@ struct dcn_bw_output { struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; struct dmub_cmd_fams2_global_config fams2_global_config; union dmub_cmd_fams2_config fams2_stream_base_params[DML2_MAX_PLANES]; - union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES]; + union { + union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES]; + union dmub_fams2_stream_static_sub_state_v2 fams2_stream_sub_params_v2[DML2_MAX_PLANES]; + }; struct dml2_display_arb_regs arb_regs; }; @@ -672,6 +689,7 @@ struct replay_context { /* Controller Id used for Dig Fe source select */ enum controller_id controllerId; unsigned int line_time_in_ns; + bool os_request_force_ffu; }; enum dc_replay_enable { diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index d19a595c2be4..134091d5842d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -622,7 +622,7 @@ extern const struct dcn_ip_params dcn10_ip_defaults; bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn_get_soc_clks( struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index c14d64687a3d..2c9a4a12bd8a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -100,6 +100,17 @@ struct dcn301_clk_internal { #define MAX_NUM_DPM_LVL 8 #define WM_SET_COUNT 4 +enum clk_type { + CLK_TYPE_DCFCLK, + CLK_TYPE_FCLK, + CLK_TYPE_MCLK, + CLK_TYPE_SOCCLK, + CLK_TYPE_DTBCLK, + CLK_TYPE_DISPCLK, + CLK_TYPE_DPPCLK, + CLK_TYPE_DSCCLK, + CLK_TYPE_COUNT +}; struct clk_limit_table_entry { unsigned int voltage; /* milivolts withh 2 fractional bits */ @@ -324,6 +335,11 @@ struct clk_mgr_funcs { int (*get_dispclk_from_dentist)(struct clk_mgr *clk_mgr_base); + bool (*is_dc_mode_present)(struct clk_mgr *clk_mgr); + + uint32_t (*set_smartmux_switch)(struct clk_mgr *clk_mgr, uint32_t pins_to_set); + + unsigned int (*get_max_clock_khz)(struct clk_mgr *clk_mgr_base, enum clk_type clk_type); }; struct clk_mgr { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index e94e9ba60f55..61c4d2a7db1c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -211,7 +211,7 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); - void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); + void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst, uint32_t num_slices_h); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 52b745667ef7..843a18287c83 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -137,6 +137,19 @@ struct dcn_hubbub_state { uint32_t dram_state_cntl; }; +struct hubbub_system_latencies { + uint32_t max_latency_ns; + uint32_t avg_latency_ns; + uint32_t min_latency_ns; +}; + +struct hubbub_urgent_latency_params { + uint32_t refclk_mhz; + uint32_t t_win_ns; + uint32_t bandwidth_mbps; + uint32_t bw_factor_x1000; +}; + struct hubbub_funcs { void (*update_dchub)( struct hubbub *hubbub, @@ -229,6 +242,17 @@ struct hubbub_funcs { void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); + void (*get_det_sizes)(struct hubbub *hubbub, uint32_t *curr_det_sizes, uint32_t *target_det_sizes); + uint32_t (*compbuf_config_error)(struct hubbub *hubbub); + struct hubbub_perfmon_funcs{ + void (*start_system_latency_measurement)(struct hubbub *hubbub); + void (*get_system_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, struct hubbub_system_latencies *latencies); + void (*start_in_order_bandwidth_measurement)(struct hubbub *hubbub); + void (*get_in_order_bandwidth_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *bandwidth_mbps); + void (*start_urgent_ramp_latency_measurement)(struct hubbub *hubbub, const struct hubbub_urgent_latency_params *params); + void (*get_urgent_ramp_latency_result)(struct hubbub *hubbub, uint32_t refclk_mhz, uint32_t *latency_ns); + void (*reset)(struct hubbub *hubbub); + } perfmon; }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 0c5675d1c593..1b7c085dc2cc 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -349,6 +349,9 @@ struct dpp_funcs { struct dpp *dpp_base, enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix); + + void (*dpp_force_disable_cursor)(struct dpp *dpp_base); + }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index b610beb075d5..2b874d2cc61c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -89,7 +89,7 @@ enum hubp_3dlut_fl_addressing_mode { enum hubp_3dlut_fl_width { hubp_3dlut_fl_width_17 = 17, hubp_3dlut_fl_width_33 = 33, - hubp_3dlut_fl_width_transformed = 4916 + hubp_3dlut_fl_width_transformed = 4916, //mpc default }; enum hubp_3dlut_fl_crossbar_bit_slice { @@ -99,6 +99,22 @@ enum hubp_3dlut_fl_crossbar_bit_slice { hubp_3dlut_fl_crossbar_bit_slice_48_63 = 3 }; +struct hubp_fl_3dlut_config { + bool enabled; + enum hubp_3dlut_fl_width width; + enum hubp_3dlut_fl_mode mode; + enum hubp_3dlut_fl_format format; + uint16_t bias; + uint16_t scale; + struct dc_plane_address address; + enum hubp_3dlut_fl_addressing_mode addr_mode; + enum dc_cm2_gpu_mem_layout layout; + uint8_t protection_bits; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_y_g; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cb_b; + enum hubp_3dlut_fl_crossbar_bit_slice crossbar_bit_slice_cr_r; +}; + struct hubp { const struct hubp_funcs *funcs; struct dc_context *ctx; @@ -282,13 +298,16 @@ struct hubp_funcs { void (*hubp_enable_3dlut_fl)(struct hubp *hubp, bool enable); void (*hubp_program_3dlut_fl_addressing_mode)(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode); void (*hubp_program_3dlut_fl_width)(struct hubp *hubp, enum hubp_3dlut_fl_width width); - void (*hubp_program_3dlut_fl_tmz_protected)(struct hubp *hubp, bool protection_enabled); + void (*hubp_program_3dlut_fl_tmz_protected)(struct hubp *hubp, uint8_t protection_bits); void (*hubp_program_3dlut_fl_crossbar)(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); int (*hubp_get_3dlut_fl_done)(struct hubp *hubp); + void (*hubp_program_3dlut_fl_config)(struct hubp *hubp, struct hubp_fl_3dlut_config *cfg); void (*hubp_clear_tiling)(struct hubp *hubp); + uint32_t (*hubp_get_current_read_line)(struct hubp *hubp); + uint32_t (*hubp_get_det_config_error)(struct hubp *hubp); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 41c76ba9ba56..62a39204fe0b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -44,7 +44,13 @@ */ #define MAX_PIPES 6 #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) -#define MAX_LINKS (MAX_PIPES * 2 +2) + +#define MAX_DPIA 6 +#define MAX_CONNECTOR 6 +#define MAX_VIRTUAL_LINKS 4 + +#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 6e303b81bfb0..22960ee03dee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -115,6 +115,16 @@ enum MCM_LUT_ID { MCM_LUT_SHAPER }; +struct mpc_fl_3dlut_config { + bool enabled; + uint16_t width; + bool select_lut_bank_a; + uint16_t bit_depth; + int hubp_index; + uint16_t bias; + uint16_t scale; +}; + union mcm_lut_params { const struct pwl_params *pwl; const struct tetrahedral_params *lut3d; @@ -190,6 +200,42 @@ struct mpc_grph_gamut_adjustment { enum mpcc_gamut_remap_id mpcc_gamut_remap_block_id; }; +struct mpc_rmcm_regs { + uint32_t rmcm_3dlut_mem_pwr_state; + uint32_t rmcm_3dlut_mem_pwr_force; + uint32_t rmcm_3dlut_mem_pwr_dis; + uint32_t rmcm_3dlut_mem_pwr_mode; + uint32_t rmcm_3dlut_size; + uint32_t rmcm_3dlut_mode; + uint32_t rmcm_3dlut_mode_cur; + uint32_t rmcm_3dlut_read_sel; + uint32_t rmcm_3dlut_30bit_en; + uint32_t rmcm_3dlut_wr_en_mask; + uint32_t rmcm_3dlut_ram_sel; + uint32_t rmcm_3dlut_out_norm_factor; + uint32_t rmcm_3dlut_fl_sel; + uint32_t rmcm_3dlut_out_offset_r; + uint32_t rmcm_3dlut_out_scale_r; + uint32_t rmcm_3dlut_fl_done; + uint32_t rmcm_3dlut_fl_soft_underflow; + uint32_t rmcm_3dlut_fl_hard_underflow; + uint32_t rmcm_cntl; + uint32_t rmcm_shaper_mem_pwr_state; + uint32_t rmcm_shaper_mem_pwr_force; + uint32_t rmcm_shaper_mem_pwr_dis; + uint32_t rmcm_shaper_mem_pwr_mode; + uint32_t rmcm_shaper_lut_mode; + uint32_t rmcm_shaper_mode_cur; + uint32_t rmcm_shaper_lut_write_en_mask; + uint32_t rmcm_shaper_lut_write_sel; + uint32_t rmcm_shaper_offset_b; + uint32_t rmcm_shaper_scale_b; + uint32_t rmcm_shaper_rama_exp_region_start_b; + uint32_t rmcm_shaper_rama_exp_region_start_seg_b; + uint32_t rmcm_shaper_rama_exp_region_end_b; + uint32_t rmcm_shaper_rama_exp_region_end_base_b; +}; + struct mpcc_sm_cfg { bool enable; /* 0-single plane,2-row subsampling,4-column subsampling,6-checkboard subsampling */ @@ -301,6 +347,7 @@ struct mpcc_state { uint32_t rgam_mode; uint32_t rgam_lut; struct mpc_grph_gamut_adjustment gamut_remap; + struct mpc_rmcm_regs rmcm_regs; }; /** @@ -1022,22 +1069,12 @@ struct mpc_funcs { */ void (*program_lut_mode)(struct mpc *mpc, const enum MCM_LUT_ID id, const enum MCM_LUT_XABLE xable, bool lut_bank_a, int mpcc_id); - /** - * @program_3dlut_size: - * - * Program 3D LUT size. - * - * Parameters: - * - [in/out] mpc - MPC context. - * - [in] is_17x17x17 - is 3dlut 17x17x17 - * - [in] mpcc_id - * - * Return: - * - * void - */ - void (*program_3dlut_size)(struct mpc *mpc, bool is_17x17x17, int mpcc_id); + /** + * @mcm: + * + * MPC MCM new HW sequential programming functions + */ struct { void (*program_3dlut_size)(struct mpc *mpc, uint32_t width, int mpcc_id); void (*program_bias_scale)(struct mpc *mpc, uint16_t bias, uint16_t scale, int mpcc_id); @@ -1050,7 +1087,13 @@ struct mpc_funcs { bool lut_bank_a, int mpcc_id); } mcm; + /** + * @rmcm: + * + * MPC RMCM new HW sequential programming functions + */ struct { + void (*fl_3dlut_configure)(struct mpc *mpc, struct mpc_fl_3dlut_config *cfg, int mpcc_id); void (*enable_3dlut_fl)(struct mpc *mpc, bool enable, int mpcc_id); void (*update_3dlut_fast_load_select)(struct mpc *mpc, int mpcc_id, int hubp_idx); void (*program_lut_read_write_control)(struct mpc *mpc, const enum MCM_LUT_ID id, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 00ea3864dd4d..227e3f8d7e5f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -46,7 +46,10 @@ struct pg_cntl_funcs { void (*opp_pg_control)(struct pg_cntl *pg_cntl, unsigned int opp_inst, bool power_on); void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); + void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on); + void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); + void (*print_pg_status)(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log); }; #endif //__DC_PG_CNTL_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index fe7f3137f228..27f950ae45ee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -117,6 +117,7 @@ struct stream_encoder { uint32_t stream_enc_inst; struct vpg *vpg; struct afmt *afmt; + struct apg *apg; }; struct enc_state { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 267ace4eef8a..f2de2cf23859 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -374,6 +374,7 @@ struct timing_generator_funcs { void (*wait_drr_doublebuffer_pending_clear)(struct timing_generator *tg); void (*set_long_vtotal)(struct timing_generator *optc, const struct long_vtotal_params *params); void (*wait_odm_doublebuffer_pending_clear)(struct timing_generator *tg); + void (*wait_otg_disable)(struct timing_generator *optc); bool (*get_optc_double_buffer_pending)(struct timing_generator *tg); bool (*get_otg_double_buffer_pending)(struct timing_generator *tg); bool (*get_pipe_update_pending)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link_service.h index 7d16351bba99..1e34e84160aa 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_service.h @@ -42,8 +42,8 @@ * dc_link_exports.c or other dc files implement dc.h * * DC to Link: - * dc_link_exports.c or other dc files include link.h - * link_factory.c implements link.h + * dc_link_exports.c or other dc files include link_service.h + * link_factory.c implements link_service.h * * Link sub-component to Link sub-component: * link_factory.c includes --> link_xxx.h @@ -73,7 +73,7 @@ * 2. Implement your function in the suitable link_xxx.c file. * 3. Assign the function to link_service in link_factory.c * 4. NEVER include link_xxx.h headers outside link component. - * 5. NEVER include link.h on DM side. + * 5. NEVER include link_service.h on DM side. */ #include "core_types.h" @@ -144,9 +144,9 @@ struct link_service { uint32_t (*dp_link_bandwidth_kbps)( const struct dc_link *link, const struct dc_link_settings *link_settings); - bool (*validate_dpia_bandwidth)( - const struct dc_stream_state *stream, - const unsigned int num_streams); + enum dc_status (*validate_dp_tunnel_bandwidth)( + const struct dc *dc, + const struct dc_state *new_ctx); uint32_t (*dp_required_hblank_size_bytes)( const struct dc_link *link, @@ -218,7 +218,10 @@ struct link_service { bool (*dp_overwrite_extended_receiver_cap)(struct dc_link *link); enum lttpr_mode (*dp_decide_lttpr_mode)(struct dc_link *link, struct dc_link_settings *link_setting); - + uint8_t (*dp_get_lttpr_count)(struct dc_link *link); + void (*edp_get_alpm_support)(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support); /*************************** DP DPIA/PHY ******************************/ void (*dpia_handle_usb4_bandwidth_allocation_for_link)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index a890f581f4e8..4e26a16a8743 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -45,6 +45,7 @@ enum dce_version resource_parse_asic_id( struct resource_caps { int num_timing_generator; int num_opp; + int num_dpp; int num_video_plane; int num_audio; int num_stream_encoder; diff --git a/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h new file mode 100644 index 000000000000..23daf98b8aa8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/soc_and_ip_translator.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef __SOC_AND_IP_TRANSLATOR_H__ +#define __SOC_AND_IP_TRANSLATOR_H__ + +#include "dc.h" +#include "dml_top_soc_parameter_types.h" + +struct soc_and_ip_translator_funcs { + void (*get_soc_bb)(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); + void (*get_ip_caps)(struct dml2_ip_capabilities *dml_ip_caps); +}; + +struct soc_and_ip_translator { + const struct soc_and_ip_translator_funcs *translator_funcs; +}; + +struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version); +void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator); + + +#endif // __SOC_AND_IP_TRANSLATOR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 96febabf464a..2676ae9f6fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -34,6 +34,7 @@ #include "dm_helpers.h" #include "dc_dmub_srv.h" #include "dce/dmub_hw_lock_mgr.h" +#include "clk_mgr.h" #define DC_LOGGER \ link->ctx->logger @@ -67,10 +68,20 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + struct dc_stream_update stream_update = { 0 }; + bool dpms_off = false; + bool needs_divider_update = false; bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); bool is_hpo_acquired; uint8_t count; int i; + struct audio_output audio_output[MAX_PIPES]; + struct dc_stream_state *streams_on_link[MAX_PIPES]; + int num_streams_on_link = 0; + struct dc *dc = (struct dc *)link->dc; + + needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != + link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); udelay(100); @@ -83,16 +94,66 @@ static void dp_retrain_link_dp_test(struct dc_link *link, link->dc, state, pipes[i]); + + // Disable OTG and re-enable after updating clocks + pipes[i]->stream_res.tg->funcs->disable_crtc(pipes[i]->stream_res.tg); } - if (link->dc->hwss.setup_hpo_hw_control) { - is_hpo_acquired = resource_is_hpo_acquired(state); - if (was_hpo_acquired != is_hpo_acquired) - link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + if (needs_divider_update && link->dc->res_pool->funcs->update_dc_state_for_encoder_switch) { + link->dc->res_pool->funcs->update_dc_state_for_encoder_switch(link, + link_setting, count, + *pipes, &audio_output[0]); + for (i = 0; i < count; i++) { + pipes[i]->clock_source->funcs->program_pix_clk( + pipes[i]->clock_source, + &pipes[i]->stream_res.pix_clk_params, + link->dc->link_srv->dp_get_encoding_format(&pipes[i]->link_config.dp_link_settings), + &pipes[i]->pll_settings); + + if (pipes[i]->stream_res.audio != NULL) { + const struct link_hwss *link_hwss = get_link_hwss( + link, &pipes[i]->link_res); + + link_hwss->setup_audio_output(pipes[i], &audio_output[i], + pipes[i]->stream_res.audio->inst); + + pipes[i]->stream_res.audio->funcs->az_configure( + pipes[i]->stream_res.audio, + pipes[i]->stream->signal, + &audio_output[i].crtc_info, + &pipes[i]->stream->audio_info, + &audio_output[i].dp_link_info); + + if (link->dc->config.disable_hbr_audio_dp2 && + pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio && + link->dc->link_srv->dp_is_128b_132b_signal(pipes[i])) + pipes[i]->stream_res.audio->funcs->az_disable_hbr_audio(pipes[i]->stream_res.audio); + } + } } - for (i = count-1; i >= 0; i--) - link_set_dpms_on(state, pipes[i]); + // Toggle on HPO I/O if necessary + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired && link->dc->hwss.setup_hpo_hw_control) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + + for (i = 0; i < count; i++) + pipes[i]->stream_res.tg->funcs->enable_crtc(pipes[i]->stream_res.tg); + + // Set DPMS on with stream update + // Cache all streams on current link since dc_update_planes_and_stream might kill current_state + for (i = 0; i < MAX_PIPES; i++) { + if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link) + streams_on_link[num_streams_on_link++] = state->streams[i]; + } + + for (i = 0; i < num_streams_on_link; i++) { + if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { + stream_update.stream = streams_on_link[i]; + stream_update.dpms_off = &dpms_off; + dc_update_planes_and_stream(dc, NULL, 0, streams_on_link[i], &stream_update); + } + } } static void dp_test_send_link_training(struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h index eae23ea7f6ec..033650cdb811 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_DP_CTS_H__ #define __LINK_DP_CTS_H__ -#include "link.h" +#include "link_service.h" void dp_handle_automated_test(struct dc_link *link); bool dp_set_test_pattern( struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h index ab437a0c9101..9ff4a6c46a2b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_trace.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_DP_TRACE_H__ #define __LINK_DP_TRACE_H__ -#include "link.h" +#include "link_service.h" void dp_trace_init(struct dc_link *link); void dp_trace_reset(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b68bcc9fca0a..892907991f91 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -138,8 +138,7 @@ void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx) stream_encoder->funcs->dvi_set_stream_attribute( stream_encoder, &stream->timing, - (stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? - true : false); + stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK); else if (dc_is_lvds_signal(stream->signal)) stream_encoder->funcs->lvds_set_stream_attribute( stream_encoder, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h index 45f0e091fcb0..4a25210a344f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.h @@ -27,7 +27,7 @@ #define __LINK_HWSS_DIO_H__ #include "link_hwss.h" -#include "link.h" +#include "link_service.h" const struct link_hwss *get_dio_link_hwss(void); bool can_use_dio_link_hwss(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h index 9ac08a332540..cf578a8662a4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.h @@ -25,7 +25,7 @@ #ifndef __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__ #define __LINK_HWSS_DIO_FIXED_VS_PE_RETIMER_H__ -#include "link.h" +#include "link_service.h" uint32_t dp_dio_fixed_vs_pe_retimer_get_lttpr_write_address(struct dc_link *link); uint8_t dp_dio_fixed_vs_pe_retimer_lane_cfg_to_hw_cfg(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h index 1d3ed8ca83b5..7c9005bc2587 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.h @@ -26,7 +26,7 @@ #define __LINK_HWSS_HPO_DP_H__ #include "link_hwss.h" -#include "link.h" +#include "link_service.h" void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c index 116ff37126e7..55c5148de800 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c @@ -74,7 +74,7 @@ static void dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(struct dc_link *link, static void dp_hpo_fixed_vs_pe_retimer_program_override_test_pattern(struct dc_link *link, struct encoder_set_dp_phy_pattern_param *tp_params) { - uint8_t clk_src = 0x4C; + uint8_t clk_src = 0xC4; uint8_t pattern = 0x4F; /* SQ128 */ const uint8_t vendor_lttpr_write_data_pg0[4] = {0x1, 0x11, 0x0, 0x0}; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h index 82301187bc7c..8bf36827ecfb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.h @@ -25,7 +25,7 @@ #ifndef __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__ #define __LINK_HWSS_HPO_FIXED_VS_PE_RETIMER_DP_H__ -#include "link.h" +#include "link_service.h" bool requires_fixed_vs_pe_retimer_hpo_link_hwss(const struct dc_link *link); const struct link_hwss *get_hpo_fixed_vs_pe_retimer_dp_link_hwss(void); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 9655e6fa53a4..85303167a553 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -593,8 +593,9 @@ static bool detect_dp(struct dc_link *link, if (sink_caps->transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { sink_caps->signal = SIGNAL_TYPE_DISPLAY_PORT; - if (!detect_dp_sink_caps(link)) + if (!detect_dp_sink_caps(link)) { return false; + } if (is_dp_branch_device(link)) /* DP SST branch */ @@ -655,7 +656,7 @@ static bool wait_for_entering_dp_alt_mode(struct dc_link *link) return true; is_in_alt_mode = link->link_enc->funcs->is_in_alt_mode(link->link_enc); - DC_LOG_DC("DP Alt mode state on HPD: %d\n", is_in_alt_mode); + DC_LOG_DC("DP Alt mode state on HPD: %d Link=%d\n", is_in_alt_mode, link->link_index); if (is_in_alt_mode) return true; @@ -1139,6 +1140,10 @@ static bool detect_link_and_local_sink(struct dc_link *link, if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A && !sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; + else if (dc_is_dvi_signal(sink->sink_signal) && + aud_support->hdmi_audio_native && + sink->edid_caps.edid_hdmi) + sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A; if (link->local_sink && dc_is_dp_signal(sink_caps.signal)) dp_trace_init(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.h b/drivers/gpu/drm/amd/display/dc/link/link_detection.h index 7da05078721e..1ab29476060b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.h @@ -25,7 +25,7 @@ #ifndef __DC_LINK_DETECTION_H__ #define __DC_LINK_DETECTION_H__ -#include "link.h" +#include "link_service.h" bool link_detect(struct dc_link *link, enum dc_detect_reason reason); bool link_detect_connection_type(struct dc_link *link, enum dc_connection_type *type); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 273a3be6d593..83419e1a9036 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -140,7 +140,8 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) } } - if ((!link->wa_flags.dp_keep_receiver_powered) || hw_init) + if (((!dc->is_switch_in_progress_dest) && ((!link->wa_flags.dp_keep_receiver_powered) || hw_init)) && + (link->type != dc_connection_none)) dpcd_write_rx_power_ctrl(link, false); } } @@ -831,7 +832,7 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) enum optc_dsc_mode optc_dsc_mode; /* Enable DSC hw block */ - dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow + + dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->dsc_padding_params.dsc_hactive_padding + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt; dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom; dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; @@ -842,14 +843,14 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst); + dccg->funcs->set_dto_dscclk(dccg, dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, dsc_cfg.dc_dsc_cfg.num_slices_h); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); } @@ -2296,8 +2297,7 @@ static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, i link->dpia_bw_alloc_config.remote_sink_req_bw[sink_index] = bw; } - /* get dp overhead for dp tunneling */ - link->dpia_bw_alloc_config.dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(link); + link->dpia_bw_alloc_config.dp_overhead = link_dpia_get_dp_overhead(link); req_bw += link->dpia_bw_alloc_config.dp_overhead; link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw); @@ -2358,9 +2358,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->sink) { if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL && pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) { - DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__, + DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__, pipe_ctx->stream->sink->edid_caps.display_name, - pipe_ctx->stream->signal); + pipe_ctx->stream->signal, link->link_index); } } @@ -2458,7 +2458,6 @@ void link_set_dpms_on( struct link_encoder *link_enc = pipe_ctx->link_res.dio_link_enc; enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg; - const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); bool apply_edp_fast_boot_optimization = pipe_ctx->stream->apply_edp_fast_boot_optimization; @@ -2474,9 +2473,10 @@ void link_set_dpms_on( if (pipe_ctx->stream->sink) { if (pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_VIRTUAL && pipe_ctx->stream->sink->sink_signal != SIGNAL_TYPE_NONE) { - DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x\n", __func__, + DC_LOG_DC("%s pipe_ctx dispname=%s signal=%x link=%d\n", __func__, pipe_ctx->stream->sink->edid_caps.display_name, - pipe_ctx->stream->signal); + pipe_ctx->stream->signal, + link->link_index); } } @@ -2502,8 +2502,6 @@ void link_set_dpms_on( pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); } - link_hwss->setup_stream_attribute(pipe_ctx); - pipe_ctx->stream->apply_edp_fast_boot_optimization = false; // Enable VPG before building infoframe @@ -2537,6 +2535,14 @@ void link_set_dpms_on( !pipe_ctx->next_odm_pipe) { pipe_ctx->stream->dpms_off = false; update_psp_stream_config(pipe_ctx, false); + + if (link->is_dds) { + uint32_t post_oui_delay = 30; // 30ms + + dpcd_set_source_specific_data(link); + msleep(post_oui_delay); + } + return; } @@ -2629,6 +2635,15 @@ void link_set_dpms_on( dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); + /* Corruption was observed on systems with display mux when stream gets + * enabled after the mux switch. Having a small delay between link + * training and stream unblank resolves the corruption issue. + * This is workaround. + */ + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + link->is_display_mux_present) + msleep(20); + dc->hwss.unblank_stream(pipe_ctx, &pipe_ctx->stream->link->cur_link_settings); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h index 9398f9c1666a..bd6fc63064a3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DPMS_H__ #define __DC_LINK_DPMS_H__ -#include "link.h" +#include "link_service.h" void link_set_dpms_on( struct dc_state *state, struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 1a04f4b74585..31a73867cd4c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -100,7 +100,7 @@ static void construct_link_service_validation(struct link_service *link_srv) { link_srv->validate_mode_timing = link_validate_mode_timing; link_srv->dp_link_bandwidth_kbps = dp_link_bandwidth_kbps; - link_srv->validate_dpia_bandwidth = link_validate_dpia_bandwidth; + link_srv->validate_dp_tunnel_bandwidth = link_validate_dp_tunnel_bandwidth; link_srv->dp_required_hblank_size_bytes = dp_required_hblank_size_bytes; } @@ -165,6 +165,8 @@ static void construct_link_service_dp_capability(struct link_service *link_srv) link_srv->dp_overwrite_extended_receiver_cap = dp_overwrite_extended_receiver_cap; link_srv->dp_decide_lttpr_mode = dp_decide_lttpr_mode; + link_srv->dp_get_lttpr_count = dp_get_lttpr_count; + link_srv->edp_get_alpm_support = edp_get_alpm_support; } /* link dp phy/dpia implements basic dp phy/dpia functionality such as @@ -539,10 +541,16 @@ static bool construct_phy(struct dc_link *link, break; case CONNECTOR_ID_EDP: + // If smartmux is supported, only create the link on the primary eDP. + // Dual eDP is not supported with smartmux. + if (!(!link->dc->config.smart_mux_version || dc_ctx->dc_edp_id_count == 0)) + goto create_fail; + link->connector_signal = SIGNAL_TYPE_EDP; if (link->hpd_gpio) { - if (!link->dc->config.allow_edp_hotplug_detection) + if (!link->dc->config.allow_edp_hotplug_detection + && !is_smartmux_suported(link)) link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; switch (link->dc->config.allow_edp_hotplug_detection) { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.h b/drivers/gpu/drm/amd/display/dc/link/link_factory.h index e96220d48d03..aad36ca1a31c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_FACTORY_H__ #define __LINK_FACTORY_H__ -#include "link.h" +#include "link_service.h" struct dc_link *link_create(const struct link_init_data *init_params); void link_destroy(struct dc_link **link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_resource.h b/drivers/gpu/drm/amd/display/dc/link/link_resource.h index 1907bda3cb6e..f7aa3bc3a93a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_resource.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_resource.h @@ -24,7 +24,7 @@ */ #ifndef __LINK_RESOURCE_H__ #define __LINK_RESOURCE_H__ -#include "link.h" +#include "link_service.h" void link_get_cur_res_map(const struct dc *dc, uint32_t *map); void link_restore_res_map(const struct dc *dc, uint32_t *map); void link_get_cur_link_res(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 29606fda029d..acdc162de535 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -86,6 +86,10 @@ static bool dp_active_dongle_validate_timing( if (!dongle_caps->is_dp_hdmi_ycbcr420_pass_through) return false; break; + case PIXEL_ENCODING_UNDEFINED: + /* These color depths are currently not supported */ + ASSERT(false); + break; default: /* Invalid Pixel Encoding*/ return false; @@ -104,6 +108,10 @@ static bool dp_active_dongle_validate_timing( if (dongle_caps->dp_hdmi_max_bpc < 12) return false; break; + case COLOR_DEPTH_UNDEFINED: + /* These color depths are currently not supported */ + ASSERT(false); + break; case COLOR_DEPTH_141414: case COLOR_DEPTH_161616: default: @@ -255,6 +263,14 @@ uint32_t dp_link_bandwidth_kbps( return link_rate_per_lane_kbps * link_settings->lane_count / 10000 * total_data_bw_efficiency_x10000; } +static uint32_t dp_get_timing_bandwidth_kbps( + const struct dc_crtc_timing *timing, + const struct dc_link *link) +{ + return dc_bandwidth_in_kbps_from_timing(timing, + dc_link_get_highest_encoding_format(link)); +} + static bool dp_validate_mode_timing( struct dc_link *link, const struct dc_crtc_timing *timing) @@ -351,63 +367,83 @@ enum dc_status link_validate_mode_timing( return DC_OK; } +static const struct dc_tunnel_settings *get_dp_tunnel_settings(const struct dc_state *context, + const struct dc_stream_state *stream) +{ + int i; + const struct dc_tunnel_settings *dp_tunnel_settings = NULL; + + for (i = 0; i < MAX_PIPES; i++) { + if (context->res_ctx.pipe_ctx[i].stream && (context->res_ctx.pipe_ctx[i].stream == stream)) { + dp_tunnel_settings = &context->res_ctx.pipe_ctx[i].link_config.dp_tunnel_settings; + break; + } + } + + return dp_tunnel_settings; +} + /* - * This function calculates the bandwidth required for the stream timing - * and aggregates the stream bandwidth for the respective dpia link + * Calculates the DP tunneling bandwidth required for the stream timing + * and aggregates the stream bandwidth for the respective DP tunneling link * - * @stream: pointer to the dc_stream_state struct instance - * @num_streams: number of streams to be validated - * - * return: true if validation is succeeded + * return: dc_status */ -bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams) +enum dc_status link_validate_dp_tunnel_bandwidth(const struct dc *dc, const struct dc_state *new_ctx) { - int bw_needed[MAX_DPIA_NUM] = {0}; - struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; - int num_dpias = 0; - - for (unsigned int i = 0; i < num_streams; ++i) { - if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { - /* new dpia sst stream, check whether it exceeds max dpia */ - if (num_dpias >= MAX_DPIA_NUM) - return false; + struct dc_validation_dpia_set dpia_link_sets[MAX_DPIA_NUM] = { 0 }; + uint8_t link_count = 0; + enum dc_status result = DC_OK; - dpia_link[num_dpias] = stream[i].link; - bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, - dc_link_get_highest_encoding_format(dpia_link[num_dpias])); - num_dpias++; - } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - uint8_t j = 0; - /* check whether its a known dpia link */ - for (; j < num_dpias; ++j) { - if (dpia_link[j] == stream[i].link) - break; - } + // Iterate through streams in the new context + for (uint8_t i = 0; (i < MAX_PIPES && i < new_ctx->stream_count); i++) { + const struct dc_stream_state *stream = new_ctx->streams[i]; + const struct dc_link *link; + const struct dc_tunnel_settings *dp_tunnel_settings; + uint32_t timing_bw; + + if (stream == NULL) + continue; + + link = stream->link; + + if (!(link && (stream->signal == SIGNAL_TYPE_DISPLAY_PORT + || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST))) + continue; + + if ((link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) && (link->hpd_status == false)) + continue; - if (j == num_dpias) { - /* new dpia mst stream, check whether it exceeds max dpia */ - if (num_dpias >= MAX_DPIA_NUM) - return false; - else { - dpia_link[j] = stream[i].link; - num_dpias++; - } + dp_tunnel_settings = get_dp_tunnel_settings(new_ctx, stream); + + if ((dp_tunnel_settings == NULL) || (dp_tunnel_settings->should_use_dp_bw_allocation == false)) + continue; + + timing_bw = dp_get_timing_bandwidth_kbps(&stream->timing, link); + + // Find an existing entry for this 'link' in 'dpia_link_sets' + for (uint8_t j = 0; j < MAX_DPIA_NUM; j++) { + bool is_new_slot = false; + + if (dpia_link_sets[j].link == NULL) { + is_new_slot = true; + link_count++; + dpia_link_sets[j].required_bw = 0; + dpia_link_sets[j].link = link; } - bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing, - dc_link_get_highest_encoding_format(dpia_link[j])); + if (is_new_slot || (dpia_link_sets[j].link == link)) { + dpia_link_sets[j].tunnel_settings = dp_tunnel_settings; + dpia_link_sets[j].required_bw += timing_bw; + break; + } } } - /* Include dp overheads */ - for (uint8_t i = 0; i < num_dpias; ++i) { - int dp_overhead = 0; - - dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]); - bw_needed[i] += dp_overhead; - } + if (link_count && link_dpia_validate_dp_tunnel_bandwidth(dpia_link_sets, link_count) == false) + result = DC_FAIL_DP_TUNNEL_BW_VALIDATE; - return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); + return result; } struct dp_audio_layout_config { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h index bf398c49c3e8..595774e76453 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h @@ -24,15 +24,15 @@ */ #ifndef __LINK_VALIDATION_H__ #define __LINK_VALIDATION_H__ -#include "link.h" +#include "link_service.h" enum dc_status link_validate_mode_timing( const struct dc_stream_state *stream, struct dc_link *link, const struct dc_crtc_timing *timing); -bool link_validate_dpia_bandwidth( - const struct dc_stream_state *stream, - const unsigned int num_streams); +enum dc_status link_validate_dp_tunnel_bandwidth( + const struct dc *dc, + const struct dc_state *new_ctx); uint32_t dp_link_bandwidth_kbps( const struct dc_link *link, const struct dc_link_settings *link_settings); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h index a3e25e55bed6..d3e6f01a6a90 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.h @@ -26,7 +26,7 @@ #ifndef __DAL_DDC_SERVICE_H__ #define __DAL_DDC_SERVICE_H__ -#include "link.h" +#include "link_service.h" #define AUX_POWER_UP_WA_DELAY 500 #define I2C_OVER_AUX_DEFER_WA_DELAY 70 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a5127c2d47ef..b12c11bd6a14 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -385,9 +385,15 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx) bool dp_is_lttpr_present(struct dc_link *link) { /* Some sink devices report invalid LTTPR revision, so don't validate against that cap */ - return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 && + uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + bool is_lttpr_present = (lttpr_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count <= 4); + + if (lttpr_count > 0 && !is_lttpr_present) + DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n"); + + return is_lttpr_present; } /* in DP compliance test, DPR-120 may have @@ -1382,6 +1388,21 @@ void dpcd_set_source_specific_data(struct dc_link *link) struct dpcd_amd_signature amd_signature = {0}; struct dpcd_amd_device_id amd_device_id = {0}; + if (link->is_dds) { + uint8_t dpcd_dp_edp_backlight_mode = 0; + + /* + * Write 0 to bits 0:1 for dp_edp_backlight_mode_set register + * if platform is DDS + */ + core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &dpcd_dp_edp_backlight_mode, sizeof(uint8_t)); + dpcd_dp_edp_backlight_mode &= ~0x3; + + core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &dpcd_dp_edp_backlight_mode, sizeof(uint8_t)); + } + amd_device_id.device_id_byte1 = (uint8_t)(link->ctx->asic_id.chip_id); amd_device_id.device_id_byte2 = @@ -1504,8 +1525,8 @@ bool read_is_mst_supported(struct dc_link *link) return false; } - rev.raw = 0; - cap.raw = 0; + rev.raw = 0; + cap.raw = 0; st = core_link_read_dpcd(link, DP_DPCD_REV, &rev.raw, sizeof(rev)); @@ -1537,6 +1558,10 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) return false; link->dpcd_sink_ext_caps.raw = dpcd_data; + if (link->is_dds && !link->dpcd_sink_ext_caps.bits.oled) { + link->dpcd_sink_ext_caps.raw = 0; + return false; + } if (core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2, &edp_general_cap2, 1) != DC_OK) return false; @@ -1551,6 +1576,8 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) uint8_t lttpr_dpcd_data[10] = {0}; enum dc_status status; bool is_lttpr_present; + uint32_t lttpr_count; + uint32_t closest_lttpr_offset; /* Logic to determine LTTPR support*/ bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; @@ -1602,20 +1629,22 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && - (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { + lttpr_count == 0) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. */ DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; + lttpr_count = 1; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ is_lttpr_present = dp_is_lttpr_present(link); DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present); @@ -1623,11 +1652,25 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) if (is_lttpr_present) { CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); - core_link_read_dpcd(link, DP_LTTPR_IEEE_OUI, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), "LTTPR IEEE OUI: "); + // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR + closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count); - core_link_read_dpcd(link, DP_LTTPR_DEVICE_ID, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), "LTTPR Device ID: "); + core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); + core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); + + if (lttpr_count > 1) { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "Closest LTTPR To Host's IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "Closest LTTPR To Host's LTTPR Device ID: "); + } else { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "LTTPR IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "LTTPR Device ID: "); + } } return status; @@ -2082,13 +2125,13 @@ void detect_edp_sink_caps(struct dc_link *link) &backlight_adj_cap, sizeof(backlight_adj_cap)); link->dpcd_caps.dynamic_backlight_capable_edp = - (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true:false; + (backlight_adj_cap & DP_EDP_DYNAMIC_BACKLIGHT_CAP) ? true : false; core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_1, &general_edp_cap, sizeof(general_edp_cap)); link->dpcd_caps.set_power_state_capable_edp = - (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true:false; + (general_edp_cap & DP_EDP_SET_POWER_CAP) ? true : false; set_default_brightness_aux(link); @@ -2152,6 +2195,12 @@ void detect_edp_sink_caps(struct dc_link *link) DP_EDP_MSO_LINK_CAPABILITIES, (uint8_t *)&link->dpcd_caps.mso_cap_sst_links_supported, sizeof(link->dpcd_caps.mso_cap_sst_links_supported)); + /* + * Read eDP general capability 2 + */ + core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2, + (uint8_t *)&link->dpcd_caps.dp_edp_general_cap_2, + sizeof(link->dpcd_caps.dp_edp_general_cap_2)); } bool dp_get_max_link_enc_cap(const struct dc_link *link, struct dc_link_settings *max_link_enc_cap) @@ -2463,3 +2512,40 @@ bool dp_is_sink_present(struct dc_link *link) return present; } + +uint8_t dp_get_lttpr_count(struct dc_link *link) +{ + if (dp_is_lttpr_present(link)) + return dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + + return 0; +} + +void edp_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support) +{ + bool lttpr_present = dp_is_lttpr_present(link); + + if (auxless_support == NULL || auxwake_support == NULL) + return; + + *auxless_support = false; + *auxwake_support = false; + + if (!dc_is_embedded_signal(link->connector_signal)) + return; + + if (link->dpcd_caps.alpm_caps.bits.AUX_LESS_ALPM_CAP) { + if (lttpr_present) { + if (link->dpcd_caps.lttpr_caps.alpm.bits.AUX_LESS_ALPM_SUPPORTED) + *auxless_support = true; + } else + *auxless_support = true; + } + + if (link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP) { + if (!lttpr_present) + *auxwake_support = true; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h index 940b147cc5d4..6e17f72a752f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_CAPABILITY_H__ #define __DC_LINK_DP_CAPABILITY_H__ -#include "link.h" +#include "link_service.h" bool detect_dp_sink_caps(struct dc_link *link); @@ -108,4 +108,10 @@ uint32_t link_bw_kbps_from_raw_frl_link_rate_data(uint8_t bw); bool dp_overwrite_extended_receiver_cap(struct dc_link *link); +uint8_t dp_get_lttpr_count(struct dc_link *link); + +void edp_get_alpm_support(struct dc_link *link, + bool *auxless_support, + bool *auxwake_support); + #endif /* __DC_LINK_DP_CAPABILITY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 22bfdced64ab..9b2f1a7da1d1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -75,12 +75,15 @@ enum dc_status dpcd_get_tunneling_device_data(struct dc_link *link) if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc) { status = core_link_read_dpcd(link, USB4_DRIVER_BW_CAPABILITY, - dpcd_dp_tun_data, 1); + dpcd_dp_tun_data, 2); if (status != DC_OK) goto err; - link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.raw = dpcd_dp_tun_data[0]; + link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.raw = + dpcd_dp_tun_data[USB4_DRIVER_BW_CAPABILITY - USB4_DRIVER_BW_CAPABILITY]; + link->dpcd_caps.usb4_dp_tun_info.dpia_tunnel_info.raw = + dpcd_dp_tun_data[DP_IN_ADAPTER_TUNNEL_INFO - USB4_DRIVER_BW_CAPABILITY]; } DC_LOG_DEBUG("%s: Link[%d] DP tunneling support (RouterId=%d AdapterId=%d) " @@ -155,8 +158,14 @@ void link_decide_dp_tunnel_settings(struct dc_stream_state *stream, link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling; if (link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc - && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) + && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support) { dp_tunnel_setting->should_use_dp_bw_allocation = true; + dp_tunnel_setting->cm_id = link->dpcd_caps.usb4_dp_tun_info.usb4_driver_id & 0x0F; + dp_tunnel_setting->group_id = link->dpcd_caps.usb4_dp_tun_info.dpia_tunnel_info.bits.group_id; + dp_tunnel_setting->estimated_bw = link->dpia_bw_alloc_config.estimated_bw; + dp_tunnel_setting->allocated_bw = link->dpia_bw_alloc_config.allocated_bw; + dp_tunnel_setting->bw_granularity = link->dpia_bw_alloc_config.bw_granularity; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h index a61edfc9ca7a..7cd03fa4892b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.h @@ -27,7 +27,7 @@ #ifndef __DC_LINK_DPIA_H__ #define __DC_LINK_DPIA_H__ -#include "link.h" +#include "link_service.h" /* Read tunneling device capability from DPCD and update link capability * accordingly. diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 3af7564a84f1..8a3c18ae97a7 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -35,6 +35,8 @@ #define Kbps_TO_Gbps (1000 * 1000) +#define MST_TIME_SLOT_COUNT 64 + // ------------------------------------------------------------------ // PRIVATE FUNCTIONS // ------------------------------------------------------------------ @@ -46,8 +48,7 @@ */ static bool link_dp_is_bw_alloc_available(struct dc_link *link) { - return (link && link->hpd_status - && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling + return (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dpia_bw_alloc && link->dpcd_caps.usb4_dp_tun_info.driver_bw_cap.bits.driver_bw_alloc_support); } @@ -160,78 +161,6 @@ static void retrieve_usb4_dp_bw_allocation_info(struct dc_link *link) link->dpia_bw_alloc_config.nrd_max_lane_count); } -static uint8_t get_lowest_dpia_index(struct dc_link *link) -{ - const struct dc *dc_struct = link->dc; - uint8_t idx = 0xFF; - int i; - - for (i = 0; i < MAX_LINKS; ++i) { - - if (!dc_struct->links[i] || - dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) - continue; - - if (idx > dc_struct->links[i]->link_index) { - idx = dc_struct->links[i]->link_index; - break; - } - } - - return idx; -} - -/* - * Get the maximum dp tunnel banwidth of host router - * - * @dc: pointer to the dc struct instance - * @hr_index: host router index - * - * return: host router maximum dp tunnel bandwidth - */ -static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index) -{ - uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]); - uint8_t hr_index_temp = 0; - struct dc_link *link_dpia_primary, *link_dpia_secondary; - int total_bw = 0; - - for (uint8_t i = 0; i < MAX_LINKS - 1; ++i) { - - if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) - continue; - - hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2; - - if (hr_index_temp == hr_index) { - link_dpia_primary = dc->links[i]; - link_dpia_secondary = dc->links[i + 1]; - - /** - * If BW allocation enabled on both DPIAs, then - * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary) - * otherwise HR BW = Estimated(bw alloc enabled dpia) - */ - if ((link_dpia_primary->hpd_status && - link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && - (link_dpia_secondary->hpd_status && - link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { - total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + - link_dpia_secondary->dpia_bw_alloc_config.allocated_bw; - } else if (link_dpia_primary->hpd_status && - link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { - total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; - } else if (link_dpia_secondary->hpd_status && - link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) { - total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw; - } - break; - } - } - - return total_bw; -} - /* * Cleanup function for when the dpia is unplugged to reset struct * and perform any required clean up @@ -251,32 +180,40 @@ static void dpia_bw_alloc_unplug(struct dc_link *link) static void link_dpia_send_bw_alloc_request(struct dc_link *link, int req_bw) { - uint8_t requested_bw; - uint32_t temp; + uint8_t request_reg_val; + uint32_t temp, request_bw; - /* Error check whether request bw greater than allocated */ - if (req_bw > link->dpia_bw_alloc_config.estimated_bw) { - DC_LOG_ERROR("%s: Request BW greater than estimated BW for link(%d)\n", - __func__, link->link_index); - req_bw = link->dpia_bw_alloc_config.estimated_bw; + if (link->dpia_bw_alloc_config.bw_granularity == 0) { + DC_LOG_ERROR("%s: Link[%d]: bw_granularity is zero!", __func__, link->link_index); + return; } temp = req_bw * link->dpia_bw_alloc_config.bw_granularity; - requested_bw = temp / Kbps_TO_Gbps; + request_reg_val = temp / Kbps_TO_Gbps; /* Always make sure to add more to account for floating points */ if (temp % Kbps_TO_Gbps) - ++requested_bw; + ++request_reg_val; - /* Error check whether requested and allocated are equal */ - req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw && (req_bw == link->dpia_bw_alloc_config.allocated_bw)) { - DC_LOG_ERROR("%s: Request BW equals to allocated BW for link(%d)\n", - __func__, link->link_index); + request_bw = request_reg_val * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); + + if (request_bw > link->dpia_bw_alloc_config.estimated_bw) { + DC_LOG_ERROR("%s: Link[%d]: Request BW (%d --> %d) > Estimated BW (%d)... Set to Estimated BW!", + __func__, link->link_index, + req_bw, request_bw, link->dpia_bw_alloc_config.estimated_bw); + req_bw = link->dpia_bw_alloc_config.estimated_bw; + + temp = req_bw * link->dpia_bw_alloc_config.bw_granularity; + request_reg_val = temp / Kbps_TO_Gbps; + if (temp % Kbps_TO_Gbps) + ++request_reg_val; } + link->dpia_bw_alloc_config.allocated_bw = request_bw; + DC_LOG_DC("%s: Link[%d]: Request BW: %d", __func__, link->link_index, request_bw); + core_link_write_dpcd(link, REQUESTED_BW, - &requested_bw, + &request_reg_val, sizeof(uint8_t)); } @@ -288,33 +225,40 @@ bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) bool ret = false; uint8_t val; - if (link->hpd_status) { - val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; + if (link->dc->debug.dpia_debug.bits.enable_bw_allocation_mode == false) { + DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode disabled", __func__, link->link_index); + return false; + } - if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { - DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index); + val = DPTX_BW_ALLOC_MODE_ENABLE | DPTX_BW_ALLOC_UNMASK_IRQ; - retrieve_usb4_dp_bw_allocation_info(link); + if (core_link_write_dpcd(link, DPTX_BW_ALLOCATION_MODE_CONTROL, &val, sizeof(uint8_t)) == DC_OK) { + DC_LOG_DEBUG("%s: link[%d] DPTX BW allocation mode enabled", __func__, link->link_index); - if (link->dpia_bw_alloc_config.nrd_max_link_rate && link->dpia_bw_alloc_config.nrd_max_lane_count) { - link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; - link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; - } + retrieve_usb4_dp_bw_allocation_info(link); - link->dpia_bw_alloc_config.bw_alloc_enabled = true; - ret = true; + if ( + link->dpia_bw_alloc_config.nrd_max_link_rate + && link->dpia_bw_alloc_config.nrd_max_lane_count) { + link->reported_link_cap.link_rate = link->dpia_bw_alloc_config.nrd_max_link_rate; + link->reported_link_cap.lane_count = link->dpia_bw_alloc_config.nrd_max_lane_count; + } + + link->dpia_bw_alloc_config.bw_alloc_enabled = true; + ret = true; + if (link->dc->debug.dpia_debug.bits.enable_usb4_bw_zero_alloc_patch) { /* - * During DP tunnel creation, CM preallocates BW and reduces estimated BW of other - * DPIA. CM release preallocation only when allocation is complete. Do zero alloc - * to make the CM to release preallocation and update estimated BW correctly for - * all DPIAs per host router + * During DP tunnel creation, the CM preallocates BW + * and reduces the estimated BW of other DPIAs. + * The CM releases the preallocation only when the allocation is complete. + * Perform a zero allocation to make the CM release the preallocation + * and correctly update the estimated BW for all DPIAs per host router. */ - // TODO: Zero allocation can be removed once the MSFT CM fix has been released link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0); - } else - DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index); - } + } + } else + DC_LOG_DEBUG("%s: link[%d] failed to enable DPTX BW allocation mode", __func__, link->link_index); return ret; } @@ -329,19 +273,17 @@ bool link_dpia_enable_usb4_dp_bw_alloc_mode(struct dc_link *link) */ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status) { + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + if (status & DP_TUNNELING_BW_REQUEST_SUCCEEDED) { DC_LOG_DEBUG("%s: BW Allocation request succeeded on link(%d)", __func__, link->link_index); } else if (status & DP_TUNNELING_BW_REQUEST_FAILED) { - link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); - DC_LOG_DEBUG("%s: BW Allocation request failed on link(%d) allocated/estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); link_dpia_send_bw_alloc_request(link, link->dpia_bw_alloc_config.estimated_bw); } else if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) { - link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); - DC_LOG_DEBUG("%s: Estimated BW changed on link(%d) new estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); } @@ -359,24 +301,25 @@ void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pe { if (link && link->dpcd_caps.usb4_dp_tun_info.dp_tun_cap.bits.dp_tunneling && link->dpia_bw_alloc_config.bw_alloc_enabled) { - //1. Hot Plug - if (link->hpd_status && peak_bw > 0) { + if (peak_bw > 0) { // If DP over USB4 then we need to check BW allocation link->dpia_bw_alloc_config.link_max_bw = peak_bw; link_dpia_send_bw_alloc_request(link, peak_bw); - } - //2. Cold Unplug - else if (!link->hpd_status) + } else dpia_bw_alloc_unplug(link); } } void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { - DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + + DC_LOG_DEBUG("%s: ENTER: link[%d] hpd(%d) Allocated_BW: %d Estimated_BW: %d Req_BW: %d", __func__, link->link_index, link->hpd_status, - link->dpia_bw_alloc_config.allocated_bw, req_bw); + link->dpia_bw_alloc_config.allocated_bw, + link->dpia_bw_alloc_config.estimated_bw, + req_bw); if (link_dp_is_bw_alloc_available(link)) link_dpia_send_bw_alloc_request(link, req_bw); @@ -384,73 +327,116 @@ void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r DC_LOG_DEBUG("%s: BW Allocation mode not available", __func__); } -bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) +uint32_t link_dpia_get_dp_overhead(const struct dc_link *link) { - bool ret = true; - int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0; - uint8_t lowest_dpia_index, i, hr_index; + uint32_t link_dp_overhead = 0; - if (!num_dpias || num_dpias > MAX_DPIA_NUM) - return ret; + if ((link->type == dc_connection_mst_branch) && + !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { + /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH + * MST overhead is 1/64 of link bandwidth (excluding any overhead) + */ + const struct dc_link_settings *link_cap = dc_link_get_link_cap(link); - lowest_dpia_index = get_lowest_dpia_index(link[0]); + if (link_cap) { + uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * + (uint32_t)link_cap->lane_count * + LINK_RATE_REF_FREQ_IN_KHZ * 8; + link_dp_overhead = (link_bw_in_kbps / MST_TIME_SLOT_COUNT) + + ((link_bw_in_kbps % MST_TIME_SLOT_COUNT) ? 1 : 0); + } + } - /* get total Host Router BW with granularity for the given modes */ - for (i = 0; i < num_dpias; ++i) { - int granularity_Gbps = 0; - int bw_granularity = 0; + return link_dp_overhead; +} - if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled) - continue; +/* + * Aggregates the DPIA bandwidth usage for the respective USB4 Router. + * And then validate if the required bandwidth is within the router's capacity. + * + * @dc_validation_dpia_set: pointer to the dc_validation_dpia_set + * @count: number of DPIA validation sets + * + * return: true if validation is succeeded + */ +bool link_dpia_validate_dp_tunnel_bandwidth(const struct dc_validation_dpia_set *dpia_link_sets, uint8_t count) +{ + uint32_t granularity_Gbps; + const struct dc_link *link; + uint32_t link_bw_granularity; + uint32_t link_required_bw; + struct usb4_router_validation_set router_sets[MAX_HOST_ROUTERS_NUM] = { 0 }; + uint8_t i; + bool is_success = true; + uint8_t router_count = 0; + + if ((dpia_link_sets == NULL) || (count == 0)) + return is_success; + + // Iterate through each DP tunneling link (DPIA). + // Aggregate its bandwidth requirements onto the respective USB4 router. + for (i = 0; i < count; i++) { + link = dpia_link_sets[i].link; + link_required_bw = dpia_link_sets[i].required_bw; + const struct dc_tunnel_settings *dp_tunnel_settings = dpia_link_sets[i].tunnel_settings; + + if ((link == NULL) || (dp_tunnel_settings == NULL) || dp_tunnel_settings->bw_granularity == 0) + break; - if (link[i]->link_index < lowest_dpia_index) - continue; + if (link->type == dc_connection_mst_branch) + link_required_bw += link_dpia_get_dp_overhead(link); - granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity); - bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps + - ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0); + granularity_Gbps = (Kbps_TO_Gbps / dp_tunnel_settings->bw_granularity); + link_bw_granularity = (link_required_bw / granularity_Gbps) * granularity_Gbps + + ((link_required_bw % granularity_Gbps) ? granularity_Gbps : 0); - hr_index = (link[i]->link_index - lowest_dpia_index) / 2; - bw_needed_per_hr[hr_index] += bw_granularity; - } + // Find or add the USB4 router associated with the current DPIA link + for (uint8_t j = 0; j < MAX_HOST_ROUTERS_NUM; j++) { + if (router_sets[j].is_valid == false) { + router_sets[j].is_valid = true; + router_sets[j].cm_id = dp_tunnel_settings->cm_id; + router_count++; + } + + if (router_sets[j].cm_id == dp_tunnel_settings->cm_id) { + uint32_t remaining_bw = + dp_tunnel_settings->estimated_bw - dp_tunnel_settings->allocated_bw; + + router_sets[j].allocated_bw += dp_tunnel_settings->allocated_bw; + + if (remaining_bw > router_sets[j].remaining_bw) + router_sets[j].remaining_bw = remaining_bw; - /* validate against each Host Router max BW */ - for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) { - if (bw_needed_per_hr[hr_index]) { - host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index); - if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) { - ret = false; + // Get the max estimated BW within the same CM_ID + if (dp_tunnel_settings->estimated_bw > router_sets[j].estimated_bw) + router_sets[j].estimated_bw = dp_tunnel_settings->estimated_bw; + + router_sets[j].required_bw += link_bw_granularity; + router_sets[j].dpia_count++; break; } } } - return ret; -} + // Validate bandwidth for each unique router found. + for (i = 0; i < router_count; i++) { + uint32_t total_bw = 0; -int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) -{ - int dp_overhead = 0, link_mst_overhead = 0; + if (router_sets[i].is_valid == false) + break; - if (!link_dp_is_bw_alloc_available(link)) - return dp_overhead; + // Determine the total available bandwidth for the current router based on aggregated data + if ((router_sets[i].dpia_count == 1) || (router_sets[i].allocated_bw == 0)) + total_bw = router_sets[i].estimated_bw; + else + total_bw = router_sets[i].allocated_bw + router_sets[i].remaining_bw; - /* if its mst link, add MTPH overhead */ - if ((link->type == dc_connection_mst_branch) && - !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { - /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH - * MST overhead is 1/64 of link bandwidth (excluding any overhead) - */ - const struct dc_link_settings *link_cap = - dc_link_get_link_cap(link); - uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * - (uint32_t)link_cap->lane_count * - LINK_RATE_REF_FREQ_IN_KHZ * 8; - link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); + if (router_sets[i].required_bw > total_bw) { + is_success = false; + break; + } } - /* add all the overheads */ - dp_overhead = link_mst_overhead; - - return dp_overhead; + return is_success; } + diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 801965b5f9a4..30cd8e2b9d35 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -26,12 +26,8 @@ #ifndef DC_INC_LINK_DP_DPIA_BW_H_ #define DC_INC_LINK_DP_DPIA_BW_H_ -#include "link.h" +#include "link_service.h" -/* Number of Host Routers per motherboard is 2 */ -#define MAX_HR_NUM 2 -/* Number of DPIA per host router is 2 */ -#define MAX_DPIA_NUM (MAX_HR_NUM * 2) /* * Host Router BW type @@ -42,6 +38,16 @@ enum bw_type { HOST_ROUTER_BW_INVALID, }; +struct usb4_router_validation_set { + bool is_valid; + uint8_t cm_id; + uint8_t dpia_count; + uint32_t required_bw; + uint32_t allocated_bw; + uint32_t estimated_bw; + uint32_t remaining_bw; +}; + /* * Enable USB4 DP BW allocation mode * @@ -74,25 +80,13 @@ void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw); /* - * Handle the validation of total BW here and confirm that the bw used by each - * DPIA doesn't exceed available BW for each host router (HR) - * - * @link[]: array of link pointer to all possible DPIA links - * @bw_needed[]: bw needed for each DPIA link based on timing - * @num_dpias: Number of DPIAs for the above 2 arrays. Should always be <= MAX_DPIA_NUM - * - * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE - */ -bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias); - -/* * Obtain all the DP overheads in dp tunneling for the dpia link * * @link: pointer to the dc_link struct instance * * return: DP overheads in DP tunneling */ -int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); +uint32_t link_dpia_get_dp_overhead(const struct dc_link *link); /* * Handle DP BW allocation status register @@ -104,4 +98,15 @@ int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); */ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status); +/* + * Aggregates the DPIA bandwidth usage for the respective USB4 Router. + * + * @dc_validation_dpia_set: pointer to the dc_validation_dpia_set + * @count: number of DPIA validation sets + * + * return: true if validation is succeeded + */ +bool link_dpia_validate_dp_tunnel_bandwidth(const struct dc_validation_dpia_set *dpia_link_sets, uint8_t count); + #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ + diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h index ac33730fedd4..87516fb3b45a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_IRQ_HANDLER_H__ #define __DC_LINK_DP_IRQ_HANDLER_H__ -#include "link.h" +#include "link_service.h" bool dp_parse_link_loss_status( struct dc_link *link, union hpd_irq_data *hpd_irq_dpcd_data); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h index ab1c1f8f1f8b..58e154494582 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_PHY_H__ #define __DC_LINK_DP_PHY_H__ -#include "link.h" +#include "link_service.h" void dp_enable_link_phy( struct dc_link *link, const struct link_resource *link_res, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 2dc1a660e504..08e2b572e0ff 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1018,7 +1018,12 @@ static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, e { enum dc_status status; uint8_t sink_status = 0; - uint8_t i; + uint32_t i; + uint8_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + uint32_t intra_hop_disable_time_ms = (lttpr_count > 0 ? lttpr_count * 300 : 10); + + // Each hop could theoretically take over 256ms (max 128b/132b AUX RD INTERVAL) + // To be safe, allow 300ms per LTTPR and 10ms for no LTTPR case /* clear training pattern set */ status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE); @@ -1028,7 +1033,7 @@ static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, e if (encoding == DP_128b_132b_ENCODING) { /* poll for intra-hop disable */ - for (i = 0; i < 10; i++) { + for (i = 0; i < intra_hop_disable_time_ms; i++) { if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) && (sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0) break; @@ -1724,6 +1729,15 @@ bool perform_link_training_with_retries( break; } + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST && + !link->dc->config.enable_dpia_pre_training) { + if (j == (attempts - 1)) + do_fallback = true; + else + do_fallback = false; + } + if (j == (attempts - 1)) { DC_LOG_WARNING( "%s: Link(%d) training attempt %u of %d failed @ rate(%d) x lane(%d) @ spread = %x : fail reason:(%d)\n", diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h index 574b083e0936..ce52de22ab7a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_DP_TRAINING_H__ #define __DC_LINK_DP_TRAINING_H__ -#include "link.h" +#include "link_service.h" bool perform_link_training_with_retries( const struct dc_link_settings *link_setting, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h index 08d787a1e451..c2717c678c72 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dpcd.h @@ -25,7 +25,7 @@ #ifndef __LINK_DPCD_H__ #define __LINK_DPCD_H__ -#include "link.h" +#include "link_service.h" #include "dpcd_defs.h" enum dc_status core_link_read_dpcd( diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index da74c2b5854f..5e806edbb9f6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -161,6 +161,9 @@ bool edp_set_backlight_level_nits(struct dc_link *link, link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds && !link->dpcd_caps.panel_luminance_control) + return true; + // use internal backlight control if dmub capabilities are not present if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX && !link->dc->caps.dmub_caps.aux_backlight_support) { @@ -173,6 +176,15 @@ bool edp_set_backlight_level_nits(struct dc_link *link, target_luminance = (struct target_luminance_value *)&backlight_millinits; + //make sure we disable AMD ABC first. + core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL, + &backlight_enable, sizeof(uint8_t)); + if (backlight_enable) { + backlight_enable = 0; + core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_CONTROL, + &backlight_enable, 1); + } + core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &backlight_enable, sizeof(uint8_t)); @@ -193,10 +205,22 @@ bool edp_set_backlight_level_nits(struct dc_link *link, *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; uint8_t backlight_control = isHDR ? 1 : 0; + uint8_t backlight_enable = 0; + // OLEDs have no PWM, they can only use AUX if (link->dpcd_sink_ext_caps.bits.oled == 1) backlight_control = 1; + //make sure we disable VESA ABC first. + core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &backlight_enable, sizeof(uint8_t)); + + if (backlight_enable & DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE) { + backlight_enable &= ~DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE; + core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &backlight_enable, sizeof(backlight_enable)); + } + if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, (uint8_t *)(&dpcd_backlight_set), sizeof(dpcd_backlight_set)) != DC_OK) @@ -222,6 +246,8 @@ bool edp_get_backlight_level_nits(struct dc_link *link, link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds) + return false; if (!core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CURRENT_PEAK, dpcd_backlight_get.raw, sizeof(union dpcd_source_backlight_get))) @@ -248,6 +274,8 @@ bool edp_backlight_enable_aux(struct dc_link *link, bool enable) link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds) + return true; if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_ENABLE, &backlight_enable, 1) != DC_OK) return false; @@ -675,6 +703,20 @@ bool edp_setup_psr(struct dc_link *link, if (!link) return false; + /* This is a workaround: some vendors require the source to + * read the PSR cap; otherwise, the vendor's PSR feature will + * fall back to its default behavior, causing a misconfiguration + * of this feature. + */ + if (link->panel_config.psr.read_psrcap_again) { + dm_helpers_dp_read_dpcd( + link->ctx, + link, + DP_PSR_SUPPORT, + &link->dpcd_caps.psr_info.psr_version, + sizeof(link->dpcd_caps.psr_info.psr_version)); + } + //Clear PSR cfg memset(&psr_configuration, 0, sizeof(psr_configuration)); dm_helpers_dp_write_dpcd( @@ -842,6 +884,8 @@ bool edp_setup_psr(struct dc_link *link, psr_context->dsc_slice_height = psr_config->dsc_slice_height; + psr_context->os_request_force_ffu = psr_config->os_request_force_ffu; + if (psr) { link->psr_settings.psr_feature_enabled = psr->funcs->psr_copy_settings(psr, link, psr_context, panel_inst); @@ -1001,6 +1045,8 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream replay_context.line_time_in_ns = lineTimeInNs; + replay_context.os_request_force_ffu = link->replay_settings.config.os_request_force_ffu; + link->replay_settings.replay_feature_enabled = replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst); if (link->replay_settings.replay_feature_enabled) { @@ -1014,7 +1060,13 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream (uint8_t *)&(replay_config.raw), sizeof(uint8_t)); memset(&alpm_config, 0, sizeof(alpm_config)); - alpm_config.bits.ENABLE = 1; + alpm_config.bits.ENABLE = link->replay_settings.config.alpm_mode != DC_ALPM_UNSUPPORTED ? 1 : 0; + + if (link->replay_settings.config.alpm_mode == DC_ALPM_AUXLESS) { + alpm_config.bits.ALPM_MODE_SEL = 1; + alpm_config.bits.ACDS_PERIOD_DURATION = 0; + } + dm_helpers_dp_write_dpcd( link->ctx, link, @@ -1173,6 +1225,16 @@ int edp_get_target_backlight_pwm(const struct dc_link *link) return (int) abm->funcs->get_target_backlight(abm); } +bool is_smartmux_suported(struct dc_link *link) +{ + if (link->dc->caps.is_apu) + return false; + if (!link->dc->config.smart_mux_version) + return false; + + return true; +} + static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, struct link_resource *link_res, bool enable) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index bcfa6ac5d4e7..62a6344e613e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -25,11 +25,12 @@ #ifndef __DC_LINK_EDP_PANEL_CONTROL_H__ #define __DC_LINK_EDP_PANEL_CONTROL_H__ -#include "link.h" +#include "link_service.h" enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); +bool is_smartmux_suported(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h index 4fb526b264f9..af529328ba17 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_hpd.h @@ -26,7 +26,7 @@ #ifndef __DC_LINK_HPD_H__ #define __DC_LINK_HPD_H__ -#include "link.h" +#include "link_service.h" enum hpd_source_id get_hpd_line(struct dc_link *link); /* diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c index 259a98e4ee2c..2a422e223bf2 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c @@ -284,7 +284,7 @@ void mcifwb2_dump_frame(struct mcif_wb *mcif_wb, REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0xf); - memcpy(dest_luma_buffer, luma_buffer, mcif_params->luma_pitch * dest_height); + memcpy(dest_luma_buffer, luma_buffer, (size_t)mcif_params->luma_pitch * dest_height); memcpy(dest_chroma_buffer, chroma_buffer, mcif_params->chroma_pitch * dest_height / 2); REG_UPDATE(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB_BUFMGR_SW_LOCK, 0x0); diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile index 1e2e66508192..5402c3529f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile @@ -68,5 +68,5 @@ MPC_DCN401 = dcn401_mpc.o AMD_DAL_MPC_DCN401 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn401/,$(MPC_DCN401)) AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN401) -endif +endif diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c index b4cea2b8cb2a..6f0e017a8ae2 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.c @@ -30,7 +30,6 @@ #include "basics/conversion.h" #include "dcn10/dcn10_cm_common.h" #include "dc.h" -#include "dcn401/dcn401_mpc.h" #define REG(reg)\ mpc30->mpc_regs->reg @@ -879,7 +878,7 @@ void mpc32_set3dlut_ram10( } -static void mpc32_set_3dlut_mode( +void mpc32_set_3dlut_mode( struct mpc *mpc, enum dc_lut_mode mode, bool is_color_channel_12bits, @@ -1022,8 +1021,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = { .power_on_mpc_mem_pwr = mpc3_power_on_ogam_lut, .get_mpc_out_mux = mpc1_get_mpc_out_mux, .set_bg_color = mpc1_set_bg_color, - .set_movable_cm_location = mpc401_set_movable_cm_location, - .populate_lut = mpc401_populate_lut, }; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h index 9622518826c9..8c9b20bcca85 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn32/dcn32_mpc.h @@ -391,4 +391,12 @@ void mpc32_select_3dlut_ram( enum dc_lut_mode mode, bool is_color_channel_12bits, uint32_t mpcc_id); + +void mpc32_set_3dlut_mode( + struct mpc *mpc, + enum dc_lut_mode mode, + bool is_color_channel_12bits, + bool is_lut_size17x17x17, + uint32_t mpcc_id); + #endif //__DC_MPCC_DCN32_H__ diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c index 98cf0cbd59ba..e1a0308dee57 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.c @@ -287,14 +287,7 @@ void mpc401_program_lut_read_write_control(struct mpc *mpc, const enum MCM_LUT_I } } -void mpc401_program_3dlut_size(struct mpc *mpc, bool is_17x17x17, int mpcc_id) -{ - struct dcn401_mpc *mpc401 = TO_DCN401_MPC(mpc); - - REG_UPDATE(MPCC_MCM_3DLUT_MODE[mpcc_id], MPCC_MCM_3DLUT_SIZE, is_17x17x17 ? 0 : 1); -} - -static void program_gamut_remap( +void mpc_program_gamut_remap( struct mpc *mpc, unsigned int mpcc_id, const uint16_t *regval, @@ -426,7 +419,7 @@ void mpc401_set_gamut_remap( if (adjust->gamut_adjust_type != GRAPHICS_GAMUT_ADJUST_TYPE_SW) { /* Bypass / Disable if type is bypass or hw */ - program_gamut_remap(mpc, mpcc_id, NULL, + mpc_program_gamut_remap(mpc, mpcc_id, NULL, adjust->mpcc_gamut_remap_block_id, MPCC_GAMUT_REMAP_MODE_SELECT_0); } else { struct fixed31_32 arr_matrix[12]; @@ -460,12 +453,12 @@ void mpc401_set_gamut_remap( else mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_2; - program_gamut_remap(mpc, mpcc_id, arr_reg_val, + mpc_program_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, mode_select); } } -static void read_gamut_remap(struct mpc *mpc, +void mpc_read_gamut_remap(struct mpc *mpc, int mpcc_id, uint16_t *regval, enum mpcc_gamut_remap_id gamut_remap_block_id, @@ -561,9 +554,9 @@ void mpc401_get_gamut_remap(struct mpc *mpc, struct mpc_grph_gamut_adjustment *adjust) { uint16_t arr_reg_val[12] = {0}; - uint32_t mode_select; + uint32_t mode_select = MPCC_GAMUT_REMAP_MODE_SELECT_0; - read_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, &mode_select); + mpc_read_gamut_remap(mpc, mpcc_id, arr_reg_val, adjust->mpcc_gamut_remap_block_id, &mode_select); if (mode_select == MPCC_GAMUT_REMAP_MODE_SELECT_0) { adjust->gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; @@ -611,7 +604,6 @@ static const struct mpc_funcs dcn401_mpc_funcs = { .populate_lut = mpc401_populate_lut, .program_lut_read_write_control = mpc401_program_lut_read_write_control, .program_lut_mode = mpc401_program_lut_mode, - .program_3dlut_size = mpc401_program_3dlut_size, }; diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h index 8e35ebc603a9..fdc42f8ab3ff 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn401/dcn401_mpc.h @@ -221,11 +221,6 @@ void mpc401_program_lut_read_write_control( bool lut_bank_a, int mpcc_id); -void mpc401_program_3dlut_size( - struct mpc *mpc, - bool is_17x17x17, - int mpcc_id); - void mpc401_set_gamut_remap( struct mpc *mpc, int mpcc_id, @@ -241,6 +236,19 @@ void mpc401_update_3dlut_fast_load_select( int mpcc_id, int hubp_idx); +void mpc_program_gamut_remap( + struct mpc *mpc, + unsigned int mpcc_id, + const uint16_t *regval, + enum mpcc_gamut_remap_id gamut_remap_block_id, + enum mpcc_gamut_remap_mode_select mode_select); + +void mpc_read_gamut_remap(struct mpc *mpc, + int mpcc_id, + uint16_t *regval, + enum mpcc_gamut_remap_id gamut_remap_block_id, + uint32_t *mode_select); + void mpc401_update_3dlut_fast_load_select( struct mpc *mpc, int mpcc_id, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h index d159e3ed3bb3..ead92ad78a23 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.h @@ -62,6 +62,7 @@ SF(OTG0_OTG_CONTROL, OTG_DISABLE_POINT_CNTL, mask_sh),\ SF(OTG0_OTG_CONTROL, OTG_FIELD_NUMBER_CNTL, mask_sh),\ SF(OTG0_OTG_CONTROL, OTG_OUT_MUX, mask_sh),\ + SF(OTG0_OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_EN, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_LINE_NUM, mask_sh),\ SF(OTG0_OTG_STEREO_CONTROL, OTG_STEREO_SYNC_OUTPUT_POLARITY, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c index 72bff94cb57d..52d5ea98c86b 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c @@ -162,6 +162,8 @@ static bool optc35_disable_crtc(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + REG_WAIT(OTG_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000); + optc1_clear_optc_underflow(optc); return true; @@ -428,6 +430,21 @@ static void optc35_set_long_vtotal( } } +static void optc35_wait_otg_disable(struct timing_generator *optc) +{ + struct optc *optc1; + uint32_t is_master_en; + + if (!optc || !optc->ctx) + return; + + optc1 = DCN10TG_FROM_TG(optc); + + REG_GET(OTG_CONTROL, OTG_MASTER_EN, &is_master_en); + if (!is_master_en) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_CURRENT_MASTER_EN_STATE, 0, 1, 100000); +} + static const struct timing_generator_funcs dcn35_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -479,6 +496,7 @@ static const struct timing_generator_funcs dcn35_tg_funcs = { .set_odm_bypass = optc32_set_odm_bypass, .set_odm_combine = optc35_set_odm_combine, .get_optc_source = optc2_get_optc_source, + .wait_otg_disable = optc35_wait_otg_disable, .set_h_timing_div_manual_mode = optc32_set_h_timing_div_manual_mode, .set_out_mux = optc3_set_out_mux, .set_drr_trigger_window = optc3_set_drr_trigger_window, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index ff79c38287df..5af13706e601 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -226,6 +226,11 @@ bool optc401_disable_crtc(struct timing_generator *optc) REG_UPDATE(CONTROL, VTG0_ENABLE, 0); + // wait until CRTC_CURRENT_MASTER_EN_STATE == 0 + REG_WAIT(OTG_CONTROL, + OTG_CURRENT_MASTER_EN_STATE, + 0, 10, 15000); + /* CRTC disabled, so disable clock. */ REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index f2ba76c1e0c0..782316348941 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -31,6 +31,7 @@ #include <linux/kgdb.h> #include <linux/delay.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/byteorder.h> diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c index af21c0a27f86..72bd43f9bbe2 100644 --- a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c @@ -79,16 +79,12 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl = 0; - bool block_enabled; - - /*need to enable dscclk regardless DSC_PG*/ - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc && power_on) - pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); + bool block_enabled = false; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_dsc_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_dsc_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, dsc_inst); @@ -111,7 +107,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 1: /* DSC1 */ REG_UPDATE(DOMAIN17_PG_CONFIG, @@ -119,7 +115,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 2: /* DSC2 */ REG_UPDATE(DOMAIN18_PG_CONFIG, @@ -127,7 +123,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; case 3: /* DSC3 */ REG_UPDATE(DOMAIN19_PG_CONFIG, @@ -135,7 +131,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo REG_WAIT(DOMAIN19_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, - 1, 1000); + 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -144,12 +140,6 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo if (dsc_inst < MAX_PIPES) pg_cntl->pg_pipe_res_enable[PG_DSC][dsc_inst] = power_on; - - if (pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on) { - /*this is to disable dscclk*/ - pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc( - pg_cntl->ctx->dc->res_pool->dccg, dsc_inst); - } } static bool pg_cntl35_hubp_dpp_pg_status(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst) @@ -189,11 +179,12 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; bool block_enabled; + bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg || + pg_cntl->ctx->dc->debug.disable_hubp_power_gate || + pg_cntl->ctx->dc->debug.disable_dpp_power_gate || + pg_cntl->ctx->dc->idle_optimizations_allowed; - if (pg_cntl->ctx->dc->debug.ignore_pg || - pg_cntl->ctx->dc->debug.disable_hubp_power_gate || - pg_cntl->ctx->dc->debug.disable_dpp_power_gate || - pg_cntl->ctx->dc->idle_optimizations_allowed) + if (skip_pg && !power_on) return; block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, hubp_dpp_inst); @@ -213,22 +204,22 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp case 0: /* DPP0 & HUBP0 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 1: /* DPP1 & HUBP1 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 2: /* DPP2 & HUBP2 */ REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; case 3: /* DPP3 & HUBP3 */ REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, power_gate); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000); + REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000); break; default: BREAK_TO_DEBUGGER(); @@ -501,6 +492,36 @@ void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl) pg_cntl->pg_res_enable[PG_DWB] = block_enabled; } +static void pg_cntl35_print_pg_status(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log) +{ + int i = 0; + bool block_enabled = false; + + DC_LOG_DEBUG("%s: %s", debug_func, debug_log); + + DC_LOG_DEBUG("PG_CNTL status:\n"); + + block_enabled = pg_cntl35_io_clk_status(pg_cntl); + DC_LOG_DEBUG("ONO0=%d (DCCG, DIO, DCIO)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_mem_status(pg_cntl); + DC_LOG_DEBUG("ONO1=%d (DCHUBBUB, DCHVM, DCHUBBUBMEM)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_plane_otg_status(pg_cntl); + DC_LOG_DEBUG("ONO2=%d (MPC, OPP, OPTC, DWB)\n", block_enabled ? 1 : 0); + + block_enabled = pg_cntl35_hpo_pg_status(pg_cntl); + DC_LOG_DEBUG("ONO3=%d (HPO)\n", block_enabled ? 1 : 0); + + for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) { + block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DCHUBP%d, DPP%d)\n", 4 + i * 2, block_enabled ? 1 : 0, i, i); + + block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i); + DC_LOG_DEBUG("ONO%d=%d (DSC%d)\n", 5 + i * 2, block_enabled ? 1 : 0, i); + } +} + static const struct pg_cntl_funcs pg_cntl35_funcs = { .init_pg_status = pg_cntl35_init_pg_status, .dsc_pg_control = pg_cntl35_dsc_pg_control, @@ -511,7 +532,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = { .mpcc_pg_control = pg_cntl35_mpcc_pg_control, .opp_pg_control = pg_cntl35_opp_pg_control, .optc_pg_control = pg_cntl35_optc_pg_control, - .dwb_pg_control = pg_cntl35_dwb_pg_control + .dwb_pg_control = pg_cntl35_dwb_pg_control, + .print_pg_status = pg_cntl35_print_pg_status }; struct pg_cntl *pg_cntl35_create( diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c index 84f73fdb0f95..c4b4dc3ad8c9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.c @@ -29,6 +29,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" @@ -836,17 +837,24 @@ static enum dc_status build_mapped_resource( return DC_OK; } -static enum dc_status dce100_validate_bandwidth( +enum dc_status dce100_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i; bool at_least_one_pipe = false; + struct dc_stream_state *stream = NULL; + const uint32_t max_pix_clk_khz = max(dc->clk_mgr->clks.max_supported_dispclk_khz, 400000); for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) + stream = context->res_ctx.pipe_ctx[i].stream; + if (stream) { at_least_one_pipe = true; + + if (stream->timing.pix_clk_100hz >= max_pix_clk_khz * 10) + return DC_FAIL_BANDWIDTH_VALIDATE; + } } if (at_least_one_pipe) { @@ -854,7 +862,16 @@ static enum dc_status dce100_validate_bandwidth( context->bw_ctx.bw.dce.dispclk_khz = 681000; context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; + /* On DCE 6.0 and 6.4 the PLL0 is both the display engine clock and + * the DP clock, and shouldn't be turned off. Just select the display + * clock value from its low power mode. + */ + if (dc->ctx->dce_version == DCE_VERSION_6_0 || + dc->ctx->dce_version == DCE_VERSION_6_4) + context->bw_ctx.bw.dce.dispclk_khz = 352000; + else + context->bw_ctx.bw.dce.dispclk_khz = 0; + context->bw_ctx.bw.dce.yclk_khz = 0; } @@ -881,7 +898,7 @@ static bool dce100_validate_surface_sets( return true; } -static enum dc_status dce100_validate_global( +enum dc_status dce100_validate_global( struct dc *dc, struct dc_state *context) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h index fecab7c560f5..dd150a4b4610 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce100/dce100_resource.h @@ -41,6 +41,15 @@ struct resource_pool *dce100_create_resource_pool( enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps); +enum dc_status dce100_validate_global( + struct dc *dc, + struct dc_state *context); + +enum dc_status dce100_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + enum dc_validate_mode validate_mode); + enum dc_status dce100_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index f3d5baac11bf..cccde5a6f3cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -963,7 +963,7 @@ static enum dc_status build_mapped_resource( static enum dc_status dce110_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool result = false; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 4225cae68c10..869a8e515fc0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -886,7 +886,7 @@ static enum dc_status build_mapped_resource( enum dc_status dce112_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool result = false; @@ -1111,12 +1111,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * memory_type_multiplier, 1000); + (int64_t)clks.clocks_in_khz[0] * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, + (int64_t)clks.clocks_in_khz[clks.num_levels>>1] * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, + (int64_t)clks.clocks_in_khz[clks.num_levels-1] * memory_type_multiplier, 1000); return; @@ -1152,12 +1152,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h index 6221d749246d..3efc4c55d2d2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.h @@ -45,7 +45,7 @@ enum dc_status dce112_validate_with_context( enum dc_status dce112_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); enum dc_status dce112_add_stream_to_ctx( struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c index eb1e158d3436..540e04ec1e2d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce120/dce120_resource.c @@ -67,7 +67,7 @@ #include "reg_helper.h" #include "dce100/dce100_resource.h" -#include "link.h" +#include "link_service.h" #ifndef mmDP0_DP_DPHY_INTERNAL_CTRL #define mmDP0_DP_DPHY_INTERNAL_CTRL 0x210f @@ -990,12 +990,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) memory_type_multiplier = MEMORY_TYPE_HBM; dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); + (int64_t)mem_clks.data[0].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * memory_type_multiplier, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, + (int64_t)mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * memory_type_multiplier, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index d9ffdded5ce1..b75be6ad64f6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -34,6 +34,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce60/irq_service_dce60.h" #include "dce110/dce110_timing_generator.h" @@ -373,7 +374,7 @@ static const struct resource_caps res_cap = { .num_timing_generator = 6, .num_audio = 6, .num_stream_encoder = 6, - .num_pll = 2, + .num_pll = 3, .num_ddc = 6, }; @@ -389,7 +390,7 @@ static const struct resource_caps res_cap_64 = { .num_timing_generator = 2, .num_audio = 2, .num_stream_encoder = 2, - .num_pll = 2, + .num_pll = 3, .num_ddc = 2, }; @@ -403,13 +404,13 @@ static const struct dc_plane_cap plane_cap = { }, .max_upscale_factor = { - .argb8888 = 16000, + .argb8888 = 1, .nv12 = 1, .fp16 = 1 }, .max_downscale_factor = { - .argb8888 = 250, + .argb8888 = 1, .nv12 = 1, .fp16 = 1 } @@ -863,61 +864,6 @@ static void dce60_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce60_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - -static bool dce60_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce60_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce60_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce60_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -931,10 +877,10 @@ static const struct resource_funcs dce60_res_pool_funcs = { .destroy = dce60_destroy_resource_pool, .link_enc_create = dce60_link_encoder_create, .panel_cntl_create = dce60_panel_cntl_create, - .validate_bandwidth = dce60_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce60_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; @@ -973,21 +919,24 @@ static bool dce60_construct( if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */ pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); pool->base.clock_sources[1] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); pool->base.clk_src_count = 2; } else { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); - pool->base.clk_src_count = 1; + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; } if (pool->base.dp_clock_source == NULL) { @@ -1365,21 +1314,24 @@ static bool dce64_construct( if (bp->fw_info_valid && bp->fw_info.external_clock_source_frequency_for_dp != 0) { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_EXTERNAL, NULL, true); + /* DCE 6.0 and 6.4: PLL0 can only be used with DP. Don't initialize it here. */ pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); pool->base.clock_sources[1] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); pool->base.clk_src_count = 2; } else { pool->base.dp_clock_source = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[0], true); + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL0, &clk_src_regs[0], true); pool->base.clock_sources[0] = - dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[1], false); - pool->base.clk_src_count = 1; + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[1] = + dce60_clock_source_create(ctx, bp, CLOCK_SOURCE_ID_PLL2, &clk_src_regs[2], false); + pool->base.clk_src_count = 2; } if (pool->base.dp_clock_source == NULL) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index bd5811f97531..5b7769745202 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -32,6 +32,7 @@ #include "stream_encoder.h" #include "resource.h" +#include "clk_mgr.h" #include "include/irq_service_interface.h" #include "irq/dce80/irq_service_dce80.h" #include "dce110/dce110_timing_generator.h" @@ -869,61 +870,6 @@ static void dce80_resource_destruct(struct dce110_resource_pool *pool) } } -static enum dc_status dce80_validate_bandwidth( - struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - int i; - bool at_least_one_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - at_least_one_pipe = true; - } - - if (at_least_one_pipe) { - /* TODO implement when needed but for now hardcode max value*/ - context->bw_ctx.bw.dce.dispclk_khz = 681000; - context->bw_ctx.bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; - } else { - context->bw_ctx.bw.dce.dispclk_khz = 0; - context->bw_ctx.bw.dce.yclk_khz = 0; - } - - return DC_OK; -} - -static bool dce80_validate_surface_sets( - struct dc_state *context) -{ - int i; - - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count == 0) - continue; - - if (context->stream_status[i].plane_count > 1) - return false; - - if (context->stream_status[i].plane_states[0]->format - >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return false; - } - - return true; -} - -static enum dc_status dce80_validate_global( - struct dc *dc, - struct dc_state *context) -{ - if (!dce80_validate_surface_sets(context)) - return DC_FAIL_SURFACE_VALIDATE; - - return DC_OK; -} - static void dce80_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -937,10 +883,10 @@ static const struct resource_funcs dce80_res_pool_funcs = { .destroy = dce80_destroy_resource_pool, .link_enc_create = dce80_link_encoder_create, .panel_cntl_create = dce80_panel_cntl_create, - .validate_bandwidth = dce80_validate_bandwidth, + .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global, + .validate_global = dce100_validate_global, .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index be4ade0853e9..652c05c35494 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -1129,12 +1129,12 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool) static enum dc_status dcn10_validate_bandwidth( struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool voltage_supported; DC_FP_START(); - voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); + voltage_supported = dcn_validate_bandwidth(dc, context, validate_mode); DC_FP_END(); return voltage_supported ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 3405be07f5e3..84b38d2d6967 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -85,7 +85,7 @@ #include "vm_helper.h" #include "link_enc_cfg.h" -#include "link.h" +#include "link_service.h" #define DC_LOGGER_INIT(logger) @@ -2007,7 +2007,7 @@ bool dcn20_fast_validate_bw( int *pipe_cnt_out, int *pipe_split_from, int *vlevel_out, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; int split[MAX_PIPES] = { 0 }; @@ -2021,7 +2021,7 @@ bool dcn20_fast_validate_bw( dcn20_merge_pipes_for_validate(dc, context); DC_FP_START(); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); DC_FP_END(); *pipe_cnt_out = pipe_cnt; @@ -2125,7 +2125,7 @@ validate_out: } enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool voltage_supported; display_e2e_pipe_params_st *pipes; @@ -2135,7 +2135,7 @@ enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, return DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); - voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate, pipes); + voltage_supported = dcn20_validate_bandwidth_fp(dc, context, validate_mode, pipes); DC_FP_END(); kfree(pipes); @@ -2736,6 +2736,8 @@ static bool dcn20_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h index c0e062c7407d..e997d35a8b86 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.h @@ -119,7 +119,7 @@ void dcn20_set_mcif_arb_params( struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +enum dc_status dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode); void dcn20_merge_pipes_for_validate( struct dc *dc, struct dc_state *context); @@ -158,7 +158,7 @@ bool dcn20_fast_validate_bw( int *pipe_cnt_out, int *pipe_split_from, int *vlevel_out, - bool fast_validate); + enum dc_validate_mode validate_mode); enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); enum dc_status dcn20_add_stream_to_ctx(struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *dc_stream); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 43fa2cb117f3..e4a1338d21e0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1285,6 +1285,8 @@ static bool dcn201_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 9ab01b65b177..918742a42ded 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -769,7 +769,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, int *pipe_cnt_out, int *pipe_split_from, int *vlevel_out, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; int split[MAX_PIPES] = { 0 }; @@ -783,7 +783,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, dcn20_merge_pipes_for_validate(dc, context); DC_FP_START(); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); DC_FP_END(); *pipe_cnt_out = pipe_cnt; @@ -924,7 +924,7 @@ validate_out: * dcn20_validate_bandwidth in dcn20_resource.c. */ static enum dc_status dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool voltage_supported; display_e2e_pipe_params_st *pipes; @@ -934,7 +934,7 @@ static enum dc_status dcn21_validate_bandwidth(struct dc *dc, struct dc_state *c return DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); - voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate, pipes); + voltage_supported = dcn21_validate_bandwidth_fp(dc, context, validate_mode, pipes); DC_FP_END(); kfree(pipes); @@ -1684,6 +1684,8 @@ static bool dcn21_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h index f7ecc002c2f7..a017fd9854d1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.h @@ -51,6 +51,6 @@ bool dcn21_fast_validate_bw( int *pipe_cnt_out, int *pipe_split_from, int *vlevel_out, - bool fast_validate); + enum dc_validate_mode validate_mode); #endif /* _DCN21_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index f631ae34e320..ff63f59ff928 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -60,7 +60,7 @@ #include "dml/display_mode_vba.h" #include "dcn30/dcn30_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dce/dce_panel_cntl.h" #include "dcn30/dcn30_dwb.h" @@ -1319,13 +1319,13 @@ static struct clock_source *dcn30_clock_source_create( int dcn30_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; DC_FP_START(); - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1627,7 +1627,7 @@ noinline bool dcn30_internal_validate_bw( display_e2e_pipe_params_st *pipes, int *pipe_cnt_out, int *vlevel_out, - bool fast_validate, + enum dc_validate_mode validate_mode, bool allow_self_refresh_only) { bool out = false; @@ -1646,7 +1646,7 @@ noinline bool dcn30_internal_validate_bw( context->bw_ctx.dml.vba.VoltageLevel = 0; context->bw_ctx.dml.vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); if (!pipe_cnt) { out = true; @@ -1655,7 +1655,7 @@ noinline bool dcn30_internal_validate_bw( dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - if (!fast_validate || !allow_self_refresh_only) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING || !allow_self_refresh_only) { /* * DML favors voltage over p-state, but we're more interested in * supporting p-state over voltage. We can't support p-state in @@ -1669,7 +1669,7 @@ noinline bool dcn30_internal_validate_bw( vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); } if (allow_self_refresh_only && - (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING || vlevel == context->bw_ctx.dml.soc.num_states || vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) { /* * If mode is unsupported or there's still no p-state support @@ -1678,7 +1678,7 @@ noinline bool dcn30_internal_validate_bw( * We don't actually support prefetch mode 2, so require that we * at least support prefetch mode 1. */ - context->bw_ctx.dml.validate_max_state = fast_validate; + context->bw_ctx.dml.validate_max_state = (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING); context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = dm_allow_self_refresh; @@ -1865,7 +1865,7 @@ noinline bool dcn30_internal_validate_bw( } if (repopulate_pipes) - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, validate_mode); context->bw_ctx.dml.vba.VoltageLevel = vlevel; *vlevel_out = vlevel; *pipe_cnt_out = pipe_cnt; @@ -2037,7 +2037,7 @@ void dcn30_calculate_wm_and_dlg( enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; @@ -2055,7 +2055,7 @@ enum dc_status dcn30_validate_bandwidth(struct dc *dc, goto validate_fail; DC_FP_START(); - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, true); DC_FP_END(); if (pipe_cnt == 0) @@ -2066,7 +2066,7 @@ enum dc_status dcn30_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } @@ -2192,7 +2192,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params j = 0; // create the final dcfclk and uclk table while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { @@ -2586,6 +2586,8 @@ static bool dcn30_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h index 689d9bdace81..2c967fe55712 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.h @@ -57,14 +57,14 @@ unsigned int dcn30_calc_max_scaled_time( unsigned int urgent_watermark); enum dc_status dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); bool dcn30_internal_validate_bw( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *pipe_cnt_out, int *vlevel_out, - bool fast_validate, + enum dc_validate_mode validate_mode, bool allow_self_refresh_only); void dcn30_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, @@ -78,7 +78,7 @@ void dcn30_populate_dml_writeback_from_context( int dcn30_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); bool dcn30_acquire_post_bldn_3dlut( struct resource_context *res_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 121a86a59833..82a205a7c25c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -1706,6 +1706,8 @@ static bool dcn301_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 012c5fd52cb1..61623cb518d9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -47,7 +47,8 @@ #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" + #include "dce/dce_abm.h" #include "dce/dce_audio.h" #include "dce/dce_aux.h" @@ -1481,6 +1482,8 @@ static bool dcn302_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index a8d0b4686f9a..02b9a84f2db3 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -47,7 +47,7 @@ #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dce/dce_abm.h" #include "dce/dce_audio.h" @@ -1414,6 +1414,8 @@ static bool dcn303_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 7e0af5297dc4..3ed7f50554e2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1616,14 +1616,14 @@ static bool is_dual_plane(enum surface_pixel_format format) int dcn31x_populate_dml_pipes_from_context(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { uint32_t pipe_cnt; int i; dc_assert_fp_enabled(); - pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); for (i = 0; i < pipe_cnt; i++) { pipes[i].pipe.src.gpuvm = 1; @@ -1641,7 +1641,7 @@ int dcn31x_populate_dml_pipes_from_context(struct dc *dc, int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -1649,7 +1649,7 @@ int dcn31_populate_dml_pipes_from_context( bool upscaled = false; DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1760,7 +1760,7 @@ dcn31_set_mcif_arb_params(struct dc *dc, enum dc_status dcn31_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; @@ -1778,19 +1778,19 @@ enum dc_status dcn31_validate_bandwidth(struct dc *dc, goto validate_fail; DC_FP_START(); - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, true); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, true); DC_FP_END(); - // Disable fast_validate to set min dcfclk in calculate_wm_and_dlg + // Disable DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX to set min dcfclk in calculate_wm_and_dlg if (pipe_cnt == 0) - fast_validate = false; + validate_mode = DC_VALIDATE_MODE_AND_PROGRAMMING; if (!out) goto validate_fail; BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } @@ -1850,7 +1850,9 @@ static struct resource_funcs dcn31_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn31_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static struct clock_source *dcn30_clock_source_create( @@ -1954,6 +1956,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; dc->config.disable_hbr_audio_dp2 = true; @@ -2199,6 +2204,8 @@ static bool dcn31_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; @@ -2228,3 +2235,35 @@ struct resource_pool *dcn31_create_resource_pool( kfree(pool); return NULL; } + +enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, + struct dc_link_settings *link_setting, + uint8_t pipe_count, + struct pipe_ctx *pipes, + struct audio_output *audio_output) +{ + struct dc_state *state = link->dc->current_state; + int i; + +#if defined(CONFIG_DRM_AMD_DC_FP) + for (i = 0; i < state->stream_count; i++) + if (state->streams[i] && state->streams[i]->link && state->streams[i]->link == link) + link->dc->hwss.calculate_pix_rate_divider((struct dc *)link->dc, state, state->streams[i]); + + for (i = 0; i < pipe_count; i++) { + link->dc->res_pool->funcs->build_pipe_pix_clk_params(&pipes[i]); + + // Setup audio + if (pipes[i].stream_res.audio != NULL) + build_audio_output(state, &pipes[i], &audio_output[i]); + } +#else + /* This DCN requires rate divider updates and audio reprogramming to allow DP1<-->DP2 link rate switching, + * but the above will not compile on architectures without an FPU. + */ + DC_LOG_WARNING("%s: DP1<-->DP2 link retraining will not work on this DCN on non-FPU platforms", __func__); + ASSERT(0); +#endif + + return DC_OK; +} diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h index dd82815d7efe..c32c85ef0ba4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h @@ -39,7 +39,7 @@ struct dcn31_resource_pool { enum dc_status dcn31_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -48,7 +48,7 @@ void dcn31_calculate_wm_and_dlg( int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn31_populate_dml_writeback_from_context(struct dc *dc, struct resource_context *res_ctx, @@ -66,6 +66,12 @@ struct resource_pool *dcn31_create_resource_pool( unsigned int dcn31_get_det_buffer_size( const struct dc_state *context); +enum dc_status dcn31_update_dc_state_for_encoder_switch(struct dc_link *link, + struct dc_link_settings *link_setting, + uint8_t pipe_count, + struct pipe_ctx *pipes, + struct audio_output *audio_output); + /*temp: B0 specific before switch to dcn313 headers*/ #ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL #define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index d96bc6cb73ad..d4917a35b991 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -926,6 +926,8 @@ static const struct dc_debug_options debug_defaults_drv = { .seamless_boot_odm_combine = true, .enable_legacy_fast_update = true, .using_dml2 = false, + .disable_dsc_power_gate = true, + .min_disp_clk_khz = 100000, }; static const struct dc_panel_config panel_config_defaults = { @@ -1667,12 +1669,12 @@ static struct clock_source *dcn31_clock_source_create( static int dcn314_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int pipe_cnt; DC_FP_START(); - pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); + pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); DC_FP_END(); return pipe_cnt; @@ -1696,7 +1698,7 @@ static void dcn314_get_panel_config_defaults(struct dc_panel_config *panel_confi enum dc_status dcn314_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; @@ -1715,19 +1717,19 @@ enum dc_status dcn314_validate_bandwidth(struct dc *dc, DC_FP_START(); // do not support self refresh only - out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate, false); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode, false); DC_FP_END(); - // Disable fast_validate to set min dcfclk in calculate_wm_and_dlg + // Disable DC_VALIDATE_MODE_ONLY and DC_VALIDATE_MODE_AND_STATE_INDEX to set min dcfclk in calculate_wm_and_dlg if (pipe_cnt == 0) - fast_validate = false; + validate_mode = DC_VALIDATE_MODE_AND_PROGRAMMING; if (!out) goto validate_fail; BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } @@ -1779,7 +1781,9 @@ static struct resource_funcs dcn314_res_pool_funcs = { .get_panel_config_defaults = dcn314_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static struct clock_source *dcn30_clock_source_create( @@ -1885,6 +1889,9 @@ static bool dcn314_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; @@ -2114,6 +2121,8 @@ static bool dcn314_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_14_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h index f8ba531d6342..ac9bb7f097d5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.h @@ -41,7 +41,7 @@ struct dcn314_resource_pool { enum dc_status dcn314_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); struct resource_pool *dcn314_create_resource_pool( const struct dc_init_data *init_data, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 6c2bb3f63be1..82cc78c291d8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1664,7 +1664,7 @@ static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context) static int dcn315_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt, crb_idx, crb_pipes; struct resource_context *res_ctx = &context->res_ctx; @@ -1674,7 +1674,7 @@ static int dcn315_populate_dml_pipes_from_context( bool pixel_rate_crb = allow_pixel_rate_crb(dc, context); DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); DC_FP_END(); for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) { @@ -1844,7 +1844,9 @@ static struct resource_funcs dcn315_res_pool_funcs = { .get_panel_config_defaults = dcn315_get_panel_config_defaults, .get_power_profile = dcn315_get_power_profile, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static bool dcn315_resource_construct( @@ -2140,6 +2142,8 @@ static bool dcn315_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_15_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 568094827212..636110e48d01 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -1610,7 +1610,7 @@ static bool is_dual_plane(enum surface_pixel_format format) static int dcn316_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -1618,7 +1618,7 @@ static int dcn316_populate_dml_pipes_from_context( const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB; DC_FP_START(); - dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn31x_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); DC_FP_END(); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { @@ -1720,7 +1720,9 @@ static struct resource_funcs dcn316_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn316_get_panel_config_defaults, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static bool dcn316_resource_construct( @@ -2008,6 +2010,8 @@ static bool dcn316_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_16_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index bb0dae0be5b8..3965a7f1b64b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -69,7 +69,7 @@ #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_panel_cntl.h" #include "dcn30/dcn30_dwb.h" @@ -739,6 +739,7 @@ static const struct dc_debug_options debug_defaults_drv = { .fpo_vactive_min_active_margin_us = 200, .fpo_vactive_max_blank_us = 1000, .enable_legacy_fast_update = false, + .disable_stutter_for_wm_program = true }; static struct dce_aux *dcn32_aux_engine_create( @@ -1742,7 +1743,7 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, } } -static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_validate) +static bool dml1_validate(struct dc *dc, struct dc_state *context, enum dc_validate_mode validate_mode) { bool out = false; @@ -1767,7 +1768,7 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val goto validate_fail; DC_FP_START(); - out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, validate_mode); DC_FP_END(); if (pipe_cnt == 0) @@ -1778,7 +1779,7 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - if (fast_validate) { + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) { BW_VAL_TRACE_SKIP(fast); goto validate_out; } @@ -1809,7 +1810,7 @@ validate_out: enum dc_status dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { unsigned int i; enum dc_status status; @@ -1827,11 +1828,11 @@ enum dc_status dcn32_validate_bandwidth(struct dc *dc, if (dc->debug.using_dml2) status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; else - status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + status = dml1_validate(dc, context, validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; - if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_OK && dc_state_is_subvp_in_use(context)) { /* check new stream configuration still supports cursor if subvp used */ for (i = 0; i < context->stream_count; i++) { stream = context->streams[i]; @@ -1846,14 +1847,14 @@ enum dc_status dcn32_validate_bandwidth(struct dc *dc, }; } - if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_FAIL_HW_CURSOR_SUPPORT) { /* attempt to validate again with subvp disabled due to cursor */ if (dc->debug.using_dml2) status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; else - status = dml1_validate(dc, context, fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + status = dml1_validate(dc, context, validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } return status; @@ -1862,7 +1863,7 @@ enum dc_status dcn32_validate_bandwidth(struct dc *dc, int dcn32_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate) + enum dc_validate_mode validate_mode) { int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; @@ -1878,7 +1879,7 @@ int dcn32_populate_dml_pipes_from_context( int num_subvp_none = 0; int odm_slice_count; - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + dcn20_populate_dml_pipes_from_context(dc, context, pipes, validate_mode); /* For single display subvp, look for subvp main so if we have phantom * pipe, we can set odm policy to match main pipe @@ -1960,7 +1961,7 @@ int dcn32_populate_dml_pipes_from_context( /* Only populate DML input with subvp info for full updates. * This is just a workaround -- needs a proper fix. */ - if (!fast_validate) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING) { switch (dc_state_get_pipe_subvp_type(context, pipe)) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; @@ -2061,21 +2062,15 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - struct dml2_configuration_options *dml2_opt = &dc->dml2_tmp; - - memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); - DC_FP_START(); dcn32_update_bw_bounding_box_fpu(dc, bw_params); - dml2_opt->use_clock_dc_limits = false; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2); + dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2); - dml2_opt->use_clock_dc_limits = true; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2_dc_power_source); + dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source); DC_FP_END(); } @@ -2257,7 +2252,7 @@ static bool dcn32_resource_construct( dc->caps.color.dpp.gamma_corr = 1; dc->caps.color.dpp.dgam_rom_for_yuv = 0; - dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.hw_3d_lut = 0; dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 // no OGAM ROM on DCN2 and later ASICs dc->caps.color.dpp.ogam_rom_caps.srgb = 0; @@ -2276,6 +2271,7 @@ static bool dcn32_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; @@ -2505,6 +2501,8 @@ static bool dcn32_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { @@ -2519,7 +2517,6 @@ static bool dcn32_resource_construct( } dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = false; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; @@ -2551,6 +2548,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; + /* init DC limited DML2 options */ + memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options)); + dc->dml2_dc_power_options.use_clock_dc_limits = true; + return true; create_fail: @@ -2852,7 +2853,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head( free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx]; free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx]; free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst; - free_pipe->hblank_borrow = otg_master->hblank_borrow; + free_pipe->dsc_padding_params = otg_master->dsc_padding_params; if (free_pipe->stream->timing.flags.DSC == 1) { dcn20_acquire_dsc(free_pipe->stream->ctx->dc, &new_ctx->res_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index d60ed77eda80..99f0432288b4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -100,12 +100,12 @@ void dcn32_add_phantom_pipes(struct dc *dc, enum dc_status dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); int dcn32_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn32_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, @@ -1141,7 +1141,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, SRI_ARR(DCN_SURF1_TTU_CNTL1, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL0, HUBPREQ, id), \ SRI_ARR(DCN_CUR0_TTU_CNTL1, HUBPREQ, id), \ - SRI_ARR(HUBP_CLK_CNTL, HUBP, id) + SRI_ARR(HUBP_CLK_CNTL, HUBP, id), \ + SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id) #define HUBP_REG_LIST_DCN2_COMMON_RI(id) \ HUBP_REG_LIST_DCN_RI(id), HUBP_REG_LIST_DCN_VM_RI(id), \ SRI_ARR(PREFETCH_SETTINGS, HUBPREQ, id), \ @@ -1229,7 +1230,8 @@ unsigned int dcn32_get_max_hw_cursor_size(const struct dc *dc, SR(DCHUBBUB_ARB_MALL_CNTL), \ SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB), \ SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS), \ - SR(SDPIF_REQUEST_RATE_LIMIT) + SR(SDPIF_REQUEST_RATE_LIMIT), \ + SR(DCHUBBUB_SDPIF_CFG0) /* DCCG */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 7db1f7a5613f..ad214986f7ac 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -72,7 +72,7 @@ #include "dml/display_mode_vba.h" #include "dcn32/dcn32_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_panel_cntl.h" #include "dcn30/dcn30_dwb.h" @@ -1580,21 +1580,15 @@ static struct dc_cap_funcs cap_funcs = { static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - struct dml2_configuration_options *dml2_opt = &dc->dml2_tmp; - - memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); - DC_FP_START(); dcn321_update_bw_bounding_box_fpu(dc, bw_params); - dml2_opt->use_clock_dc_limits = false; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2); + dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2); - dml2_opt->use_clock_dc_limits = true; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2_dc_power_source); + dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source); DC_FP_END(); } @@ -1761,8 +1755,8 @@ static bool dcn321_resource_construct( dc->caps.color.dpp.gamma_corr = 1; dc->caps.color.dpp.dgam_rom_for_yuv = 0; - dc->caps.color.dpp.hw_3d_lut = 1; - dc->caps.color.dpp.ogam_ram = 1; + dc->caps.color.dpp.hw_3d_lut = 0; + dc->caps.color.dpp.ogam_ram = 0; // no OGAM ROM on DCN2 and later ASICs dc->caps.color.dpp.ogam_rom_caps.srgb = 0; dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; @@ -1780,6 +1774,7 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; @@ -2004,6 +1999,8 @@ static bool dcn321_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { @@ -2018,7 +2015,6 @@ static bool dcn321_resource_construct( } dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = false; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; @@ -2046,6 +2042,10 @@ static bool dcn321_resource_construct( dc->dml2_options.max_segments_per_hubp = 18; dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE; + /* init DC limited DML2 options */ + memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options)); + dc->dml2_dc_power_options.use_clock_dc_limits = true; + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 72c6cf047db0..fff57f23f4f7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -61,7 +61,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1734,15 +1734,15 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config static enum dc_status dcn35_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; out = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate); + validate_mode); - if (fast_validate) + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); @@ -1760,6 +1760,20 @@ enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_stat } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; +} + static struct resource_funcs dcn35_res_pool_funcs = { .destroy = dcn35_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1770,7 +1784,7 @@ static struct resource_funcs dcn35_res_pool_funcs = { .validate_bandwidth = dcn35_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1786,7 +1800,9 @@ static struct resource_funcs dcn35_res_pool_funcs = { .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static bool dcn35_resource_construct( @@ -1874,7 +1890,7 @@ static bool dcn35_resource_construct( dc->caps.color.dpp.gamma_corr = 1; dc->caps.color.dpp.dgam_rom_for_yuv = 0; - dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.hw_3d_lut = 0; dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 // no OGAM ROM on DCN301 dc->caps.color.dpp.ogam_rom_caps.srgb = 0; @@ -1893,6 +1909,10 @@ static bool dcn35_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; + + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. @@ -2148,12 +2168,13 @@ static bool dcn35_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = pool->base.pipe_count; dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = true; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = false; if (dc->config.EnableMinDispClkODM) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 989a270f7dea..0abd163b425e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -40,7 +40,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1714,15 +1714,15 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config static enum dc_status dcn351_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; out = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate); + validate_mode); - if (fast_validate) + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); @@ -1732,6 +1732,21 @@ static enum dc_status dcn351_validate_bandwidth(struct dc *dc, return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn351_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; + +} + static struct resource_funcs dcn351_res_pool_funcs = { .destroy = dcn351_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1742,7 +1757,7 @@ static struct resource_funcs dcn351_res_pool_funcs = { .validate_bandwidth = dcn351_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn351_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1758,7 +1773,9 @@ static struct resource_funcs dcn351_res_pool_funcs = { .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia, .get_det_buffer_size = dcn31_get_det_buffer_size, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static bool dcn351_resource_construct( @@ -1846,7 +1863,7 @@ static bool dcn351_resource_construct( dc->caps.color.dpp.gamma_corr = 1; dc->caps.color.dpp.dgam_rom_for_yuv = 0; - dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.hw_3d_lut = 0; dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 // no OGAM ROM on DCN301 dc->caps.color.dpp.ogam_rom_caps.srgb = 0; @@ -1865,6 +1882,10 @@ static bool dcn351_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; + + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. @@ -2119,13 +2140,14 @@ static bool dcn351_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = pool->base.pipe_count; dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = true; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = false; if (dc->config.EnableMinDispClkODM) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 48e1f234185f..ca125ee6c2fb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -40,7 +40,7 @@ #include "dcn31/dcn31_hpo_dp_stream_encoder.h" #include "dcn31/dcn31_hpo_dp_link_encoder.h" #include "dcn32/dcn32_hpo_dp_link_encoder.h" -#include "link.h" +#include "link_service.h" #include "dcn31/dcn31_apg.h" #include "dcn32/dcn32_dio_link_encoder.h" #include "dcn31/dcn31_vpg.h" @@ -1715,15 +1715,15 @@ static void dcn35_get_panel_config_defaults(struct dc_panel_config *panel_config static enum dc_status dcn35_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { bool out = false; out = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate); + validate_mode); - if (fast_validate) + if (validate_mode != DC_VALIDATE_MODE_AND_PROGRAMMING) return out ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; DC_FP_START(); @@ -1734,6 +1734,20 @@ static enum dc_status dcn35_validate_bandwidth(struct dc *dc, } +static int populate_dml_pipes_from_context_fpu(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + enum dc_validate_mode validate_mode) +{ + int ret; + + DC_FP_START(); + ret = dcn35_populate_dml_pipes_from_context_fpu(dc, context, pipes, validate_mode); + DC_FP_END(); + + return ret; +} + static struct resource_funcs dcn36_res_pool_funcs = { .destroy = dcn36_destroy_resource_pool, .link_enc_create = dcn35_link_encoder_create, @@ -1744,7 +1758,7 @@ static struct resource_funcs dcn36_res_pool_funcs = { .validate_bandwidth = dcn35_validate_bandwidth, .calculate_wm_and_dlg = NULL, .update_soc_for_wm_a = dcn31_update_soc_for_wm_a, - .populate_dml_pipes = dcn35_populate_dml_pipes_from_context_fpu, + .populate_dml_pipes = populate_dml_pipes_from_context_fpu, .acquire_free_pipe_as_secondary_dpp_pipe = dcn20_acquire_free_pipe_for_layer, .release_pipe = dcn20_release_pipe, .add_stream_to_ctx = dcn30_add_stream_to_ctx, @@ -1759,7 +1773,9 @@ static struct resource_funcs dcn36_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, .get_panel_config_defaults = dcn35_get_panel_config_defaults, .get_preferred_eng_id_dpia = dcn36_get_preferred_eng_id_dpia, - .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe + .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe, + .update_dc_state_for_encoder_switch = dcn31_update_dc_state_for_encoder_switch, + .build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params }; static bool dcn36_resource_construct( @@ -1847,7 +1863,7 @@ static bool dcn36_resource_construct( dc->caps.color.dpp.gamma_corr = 1; dc->caps.color.dpp.dgam_rom_for_yuv = 0; - dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.hw_3d_lut = 0; dc->caps.color.dpp.ogam_ram = 0; // no OGAM in DPP since DCN1 // no OGAM ROM on DCN301 dc->caps.color.dpp.ogam_rom_caps.srgb = 0; @@ -1866,6 +1882,10 @@ static bool dcn36_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; + + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. @@ -2121,12 +2141,13 @@ static bool dcn36_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = pool->base.pipe_count; dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = true; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = false; if (dc->config.EnableMinDispClkODM) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index f420c4dafa03..1d18807e4749 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -50,7 +50,7 @@ #include "dml/display_mode_vba.h" #include "dcn401/dcn401_dccg.h" #include "dcn10/dcn10_resource.h" -#include "link.h" +#include "link_service.h" #include "link_enc_cfg.h" #include "dcn31/dcn31_panel_cntl.h" @@ -70,7 +70,6 @@ #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" -#include "dml/dcn401/dcn401_fpu.h" #include "dc_state_priv.h" @@ -709,6 +708,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = false, + .disable_force_pstate_allow_on_hw_release = false, .exit_idle_opt_for_cursor_updates = true, .using_dml2 = true, .using_dml21 = true, @@ -1608,10 +1608,6 @@ static struct dc_cap_funcs cap_funcs = { static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - struct dml2_configuration_options *dml2_opt = &dc->dml2_tmp; - - memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options)); - /* re-calculate the available MALL size if required */ if (bw_params->num_channels > 0) { dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall( @@ -1622,15 +1618,11 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b DC_FP_START(); - dcn401_update_bw_bounding_box_fpu(dc, bw_params); - - dml2_opt->use_clock_dc_limits = false; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2); + dml2_reinit(dc, &dc->dml2_options, &dc->current_state->bw_ctx.dml2); - dml2_opt->use_clock_dc_limits = true; if (dc->debug.using_dml2 && dc->current_state && dc->current_state->bw_ctx.dml2_dc_power_source) - dml2_reinit(dc, dml2_opt, &dc->current_state->bw_ctx.dml2_dc_power_source); + dml2_reinit(dc, &dc->dml2_dc_power_options, &dc->current_state->bw_ctx.dml2_dc_power_source); DC_FP_END(); } @@ -1644,7 +1636,7 @@ enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_sta enum dc_status dcn401_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) + enum dc_validate_mode validate_mode) { unsigned int i; enum dc_status status = DC_OK; @@ -1662,9 +1654,9 @@ enum dc_status dcn401_validate_bandwidth(struct dc *dc, if (dc->debug.using_dml2) status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; - if (!fast_validate && status == DC_OK && dc_state_is_subvp_in_use(context)) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_OK && dc_state_is_subvp_in_use(context)) { /* check new stream configuration still supports cursor if subvp used */ for (i = 0; i < context->stream_count; i++) { stream = context->streams[i]; @@ -1679,12 +1671,12 @@ enum dc_status dcn401_validate_bandwidth(struct dc *dc, }; } - if (!fast_validate && status == DC_FAIL_HW_CURSOR_SUPPORT) { + if (validate_mode == DC_VALIDATE_MODE_AND_PROGRAMMING && status == DC_FAIL_HW_CURSOR_SUPPORT) { /* attempt to validate again with subvp disabled due to cursor */ if (dc->debug.using_dml2) status = dml2_validate(dc, context, context->power_source == DC_POWER_SOURCE_DC ? context->bw_ctx.dml2_dc_power_source : context->bw_ctx.dml2, - fast_validate) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; + validate_mode) ? DC_OK : DC_FAIL_BANDWIDTH_VALIDATE; } return status; @@ -1707,6 +1699,9 @@ static void dcn401_build_pipe_pix_clk_params(struct pipe_ctx *pipe_ctx) pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; + if (pipe_ctx->dsc_padding_params.dsc_hactive_padding != 0) + pixel_clk_params->requested_pix_clk_100hz = pipe_ctx->dsc_padding_params.dsc_pix_clk_100hz; + if (!pipe_ctx->stream->ctx->dc->config.unify_link_enc_assignment) link_enc = link_enc_cfg_get_link_enc(link); if (link_enc) @@ -1957,8 +1952,30 @@ static bool dcn401_resource_construct( dc->caps.color.mpc.ogam_rom_caps.pq = 0; dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.color.mpc.preblend = true; dc->config.use_spl = true; dc->config.prefer_easf = true; + + dc->config.dcn_sharpness_range.sdr_rgb_min = 0; + dc->config.dcn_sharpness_range.sdr_rgb_max = 1750; + dc->config.dcn_sharpness_range.sdr_rgb_mid = 750; + dc->config.dcn_sharpness_range.sdr_yuv_min = 0; + dc->config.dcn_sharpness_range.sdr_yuv_max = 3500; + dc->config.dcn_sharpness_range.sdr_yuv_mid = 1500; + dc->config.dcn_sharpness_range.hdr_rgb_min = 0; + dc->config.dcn_sharpness_range.hdr_rgb_max = 2750; + dc->config.dcn_sharpness_range.hdr_rgb_mid = 1500; + + dc->config.dcn_override_sharpness_range.sdr_rgb_min = 0; + dc->config.dcn_override_sharpness_range.sdr_rgb_max = 3250; + dc->config.dcn_override_sharpness_range.sdr_rgb_mid = 1250; + dc->config.dcn_override_sharpness_range.sdr_yuv_min = 0; + dc->config.dcn_override_sharpness_range.sdr_yuv_max = 3500; + dc->config.dcn_override_sharpness_range.sdr_yuv_mid = 1500; + dc->config.dcn_override_sharpness_range.hdr_rgb_min = 0; + dc->config.dcn_override_sharpness_range.hdr_rgb_max = 2750; + dc->config.dcn_override_sharpness_range.hdr_rgb_mid = 1500; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; dc->config.set_pipe_unlock_order = true; /* Need to ensure DET gets freed before allocating */ @@ -2177,6 +2194,8 @@ static bool dcn401_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { @@ -2195,7 +2214,6 @@ static bool dcn401_resource_construct( dc->config.sdpif_request_limit_words_per_umc = 16; dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; - dc->dml2_options.use_native_pstate_optimization = false; dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.map_dc_pipes_with_callbacks = true; @@ -2228,6 +2246,10 @@ static bool dcn401_resource_construct( /* SPL */ dc->caps.scl_caps.sharpener_support = true; + /* init DC limited DML2 options */ + memcpy(&dc->dml2_dc_power_options, &dc->dml2_options, sizeof(struct dml2_configuration_options)); + dc->dml2_dc_power_options.use_clock_dc_limits = true; + return true; create_fail: diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index dc52a30991af..0fc66487d800 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -24,7 +24,7 @@ enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_sta enum dc_status dcn401_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate); + enum dc_validate_mode validate_mode); void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); @@ -140,7 +140,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ - SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id), \ + SRI_ARR(HUBPRET_READ_LINE_VALUE, HUBPRET, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile new file mode 100644 index 000000000000..bc93356a0b5b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: MIT +# +# Copyright 2025 Advanced Micro Devices, Inc. +# Makefile for bounding box component. +# Floating point required due to nature of bounding box values + +soc_and_ip_translator_ccflags := $(CC_FLAGS_FPU) +soc_and_ip_translator_rcflags := $(CC_FLAGS_NO_FPU) + +CFLAGS_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_ccflags) + +CFLAGS_REMOVE_$(AMDDALPATH)/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.o := $(soc_and_ip_translator_rcflags) + +soc_and_ip_translator := soc_and_ip_translator.o +soc_and_ip_translator += dcn401/dcn401_soc_and_ip_translator.o + +AMD_DAL_soc_and_ip_translator := $(addprefix $(AMDDALPATH)/dc/soc_and_ip_translator/, $(soc_and_ip_translator)) + +AMD_DISPLAY_FILES += $(AMD_DAL_soc_and_ip_translator) diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c new file mode 100644 index 000000000000..3190c76eb482 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dcn401_soc_and_ip_translator.h" +#include "bounding_boxes/dcn4_soc_bb.h" + +/* soc_and_ip_translator component used to get up-to-date values for bounding box. + * Bounding box values are stored in several locations and locations can vary with DCN revision. + * This component provides an interface to get DCN-specific bounding box values. + */ + +static void get_default_soc_bb(struct dml2_soc_bb *soc_bb) +{ + memcpy(soc_bb, &dml2_socbb_dcn401, sizeof(struct dml2_soc_bb)); + memcpy(&soc_bb->qos_parameters, &dml_dcn4_variant_a_soc_qos_params, sizeof(struct dml2_soc_qos_parameters)); +} + +/* + * DC clock table is obtained from SMU during runtime. + * SMU stands for System Management Unit. It is a power management processor. + * It owns the initialization of dc's clock table and programming of clock values + * based on dc's requests. + * Our clock values in base soc bb is a dummy placeholder. The real clock values + * are retrieved from SMU firmware to dc clock table at runtime. + * This function overrides our dummy placeholder values with real values in dc + * clock table. + */ +static void dcn401_convert_dc_clock_table_to_soc_bb_clock_table( + struct dml2_soc_state_table *dml_clk_table, + const struct clk_bw_params *dc_bw_params, + bool use_clock_dc_limits) +{ + int i; + const struct clk_limit_table *dc_clk_table; + + if (dc_bw_params == NULL) + /* skip if bw params could not be obtained from smu */ + return; + + dc_clk_table = &dc_bw_params->clk_table; + + /* dcfclk */ + if (dc_clk_table->num_entries_per_clk.num_dcfclk_levels) { + dml_clk_table->dcfclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dcfclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dcfclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dcfclk_mhz && + dc_clk_table->entries[i].dcfclk_mhz > dc_bw_params->dc_mode_limit.dcfclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dcfclk_mhz < dc_bw_params->dc_mode_limit.dcfclk_mhz) { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dcfclk_mhz * 1000; + dml_clk_table->dcfclk.num_clk_values = i + 1; + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + dml_clk_table->dcfclk.num_clk_values = i; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = dc_clk_table->entries[i].dcfclk_mhz * 1000; + } + } else { + dml_clk_table->dcfclk.clk_values_khz[i] = 0; + } + } + } + + /* fclk */ + if (dc_clk_table->num_entries_per_clk.num_fclk_levels) { + dml_clk_table->fclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_fclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->fclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.fclk_mhz && + dc_clk_table->entries[i].fclk_mhz > dc_bw_params->dc_mode_limit.fclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].fclk_mhz < dc_bw_params->dc_mode_limit.fclk_mhz) { + dml_clk_table->fclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.fclk_mhz * 1000; + dml_clk_table->fclk.num_clk_values = i + 1; + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + dml_clk_table->fclk.num_clk_values = i; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = dc_clk_table->entries[i].fclk_mhz * 1000; + } + } else { + dml_clk_table->fclk.clk_values_khz[i] = 0; + } + } + } + + /* uclk */ + if (dc_clk_table->num_entries_per_clk.num_memclk_levels) { + dml_clk_table->uclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_memclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->uclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.memclk_mhz && + dc_clk_table->entries[i].memclk_mhz > dc_bw_params->dc_mode_limit.memclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].memclk_mhz < dc_bw_params->dc_mode_limit.memclk_mhz) { + dml_clk_table->uclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.memclk_mhz * 1000; + dml_clk_table->uclk.num_clk_values = i + 1; + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + dml_clk_table->uclk.num_clk_values = i; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = dc_clk_table->entries[i].memclk_mhz * 1000; + } + } else { + dml_clk_table->uclk.clk_values_khz[i] = 0; + } + } + } + + /* dispclk */ + if (dc_clk_table->num_entries_per_clk.num_dispclk_levels) { + dml_clk_table->dispclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dispclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dispclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dispclk_mhz && + dc_clk_table->entries[i].dispclk_mhz > dc_bw_params->dc_mode_limit.dispclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dispclk_mhz < dc_bw_params->dc_mode_limit.dispclk_mhz) { + dml_clk_table->dispclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dispclk_mhz * 1000; + dml_clk_table->dispclk.num_clk_values = i + 1; + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + dml_clk_table->dispclk.num_clk_values = i; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = dc_clk_table->entries[i].dispclk_mhz * 1000; + } + } else { + dml_clk_table->dispclk.clk_values_khz[i] = 0; + } + } + } + + /* dppclk */ + if (dc_clk_table->num_entries_per_clk.num_dppclk_levels) { + dml_clk_table->dppclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dppclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dppclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dppclk_mhz && + dc_clk_table->entries[i].dppclk_mhz > dc_bw_params->dc_mode_limit.dppclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dppclk_mhz < dc_bw_params->dc_mode_limit.dppclk_mhz) { + dml_clk_table->dppclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dppclk_mhz * 1000; + dml_clk_table->dppclk.num_clk_values = i + 1; + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + dml_clk_table->dppclk.num_clk_values = i; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = dc_clk_table->entries[i].dppclk_mhz * 1000; + } + } else { + dml_clk_table->dppclk.clk_values_khz[i] = 0; + } + } + } + + /* dtbclk */ + if (dc_clk_table->num_entries_per_clk.num_dtbclk_levels) { + dml_clk_table->dtbclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_dtbclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->dtbclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.dtbclk_mhz && + dc_clk_table->entries[i].dtbclk_mhz > dc_bw_params->dc_mode_limit.dtbclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].dtbclk_mhz < dc_bw_params->dc_mode_limit.dtbclk_mhz) { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.dtbclk_mhz * 1000; + dml_clk_table->dtbclk.num_clk_values = i + 1; + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + dml_clk_table->dtbclk.num_clk_values = i; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = dc_clk_table->entries[i].dtbclk_mhz * 1000; + } + } else { + dml_clk_table->dtbclk.clk_values_khz[i] = 0; + } + } + } + + /* socclk */ + if (dc_clk_table->num_entries_per_clk.num_socclk_levels) { + dml_clk_table->socclk.num_clk_values = dc_clk_table->num_entries_per_clk.num_socclk_levels; + for (i = 0; i < min(DML_MAX_CLK_TABLE_SIZE, MAX_NUM_DPM_LVL); i++) { + if (i < dml_clk_table->socclk.num_clk_values) { + if (use_clock_dc_limits && dc_bw_params->dc_mode_limit.socclk_mhz && + dc_clk_table->entries[i].socclk_mhz > dc_bw_params->dc_mode_limit.socclk_mhz) { + if (i == 0 || dc_clk_table->entries[i-1].socclk_mhz < dc_bw_params->dc_mode_limit.socclk_mhz) { + dml_clk_table->socclk.clk_values_khz[i] = dc_bw_params->dc_mode_limit.socclk_mhz * 1000; + dml_clk_table->socclk.num_clk_values = i + 1; + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + dml_clk_table->socclk.num_clk_values = i; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = dc_clk_table->entries[i].socclk_mhz * 1000; + } + } else { + dml_clk_table->socclk.clk_values_khz[i] = 0; + } + } + } + + /* dram config */ + dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels; + dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes; +} + +void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + soc_bb->dprefclk_mhz = dc->clk_mgr->dprefclk_khz / 1000; + soc_bb->dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + soc_bb->mall_allocated_for_dcn_mbytes = dc->caps.mall_size_total / (1024 * 1024); + + if (dc->clk_mgr->funcs->is_smu_present && + dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr)) { + dcn401_convert_dc_clock_table_to_soc_bb_clock_table(&soc_bb->clk_table, + dc->clk_mgr->bw_params, + config->use_clock_dc_limits); + } +} + +void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc) +{ + soc_bb->dchub_refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + soc_bb->xtalclk_mhz = dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency / 1000; + + /* latencies in vbios are platform specific and should be used if provided */ + if (dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns) + soc_bb->power_management_parameters.dram_clk_change_blackout_us = + dc->ctx->dc_bios->bb_info.dram_clock_change_latency_100ns / 10.0; + + if (dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns) + soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + dc->ctx->dc_bios->bb_info.dram_sr_enter_exit_latency_100ns / 10.0; + + if (dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns) + soc_bb->power_management_parameters.stutter_exit_latency_us = + dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns / 10.0; +} + +void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc) +{ + /* set if the value is provided */ + if (dc->bb_overrides.sr_exit_time_ns) + soc_bb->power_management_parameters.stutter_exit_latency_us = + dc->bb_overrides.sr_exit_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_time_ns) + soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + + if (dc->bb_overrides.dram_clock_change_latency_ns) + soc_bb->power_management_parameters.dram_clk_change_blackout_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + + if (dc->bb_overrides.fclk_clock_change_latency_ns) + soc_bb->power_management_parameters.fclk_change_blackout_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000.0; + + //Z8 values not expected nor used on DCN401 but still added for completeness + if (dc->bb_overrides.sr_exit_z8_time_ns) + soc_bb->power_management_parameters.z8_stutter_exit_latency_us = + dc->bb_overrides.sr_exit_z8_time_ns / 1000.0; + + if (dc->bb_overrides.sr_enter_plus_exit_z8_time_ns) + soc_bb->power_management_parameters.z8_stutter_enter_plus_exit_latency_us = + dc->bb_overrides.sr_enter_plus_exit_z8_time_ns / 1000.0; +} + +static void apply_soc_bb_updates(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + /* Individual modification can be overwritten even if it was obtained by a previous function. + * Modifications are acquired in order of priority (lowest to highest). + */ + dc_assert_fp_enabled(); + + dcn401_update_soc_bb_with_values_from_clk_mgr(soc_bb, dc, config); + dcn401_update_soc_bb_with_values_from_vbios(soc_bb, dc); + dcn401_update_soc_bb_with_values_from_software_policy(soc_bb, dc); +} + +void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config) +{ + //get default soc_bb with static values + get_default_soc_bb(soc_bb); + //update soc_bb values with more accurate values + apply_soc_bb_updates(soc_bb, dc, config); +} + +static void dcn401_get_ip_caps(struct dml2_ip_capabilities *ip_caps) +{ + *ip_caps = dml2_dcn401_max_ip_caps; +} + +static struct soc_and_ip_translator_funcs dcn401_translator_funcs = { + .get_soc_bb = dcn401_get_soc_bb, + .get_ip_caps = dcn401_get_ip_caps, +}; + +void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator) +{ + soc_and_ip_translator->translator_funcs = &dcn401_translator_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h new file mode 100644 index 000000000000..21d842857601 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef _DCN401_SOC_AND_IP_TRANSLATOR_H_ +#define _DCN401_SOC_AND_IP_TRANSLATOR_H_ + +#include "core_types.h" +#include "dc.h" +#include "clk_mgr.h" +#include "soc_and_ip_translator.h" +#include "dml2/dml21/inc/dml_top_soc_parameter_types.h" + +void dcn401_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator); + +/* Functions that can be re-used by higher DCN revisions of this component */ +void dcn401_get_soc_bb(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); +void dcn401_update_soc_bb_with_values_from_clk_mgr(struct dml2_soc_bb *soc_bb, const struct dc *dc, const struct dml2_configuration_options *config); +void dcn401_update_soc_bb_with_values_from_vbios(struct dml2_soc_bb *soc_bb, const struct dc *dc); +void dcn401_update_soc_bb_with_values_from_software_policy(struct dml2_soc_bb *soc_bb, const struct dc *dc); + +#endif /* _DCN401_SOC_AND_IP_TRANSLATOR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c new file mode 100644 index 000000000000..c9e224d262c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "dcn42_soc_and_ip_translator.h" +#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h" +#include "bounding_boxes/dcn42_soc_bb.h" + +/* soc_and_ip_translator component used to get up-to-date values for bounding box. + * Bounding box values are stored in several locations and locations can vary with DCN revision. + * This component provides an interface to get DCN-specific bounding box values. + */ + +static void dcn42_get_ip_caps(struct dml2_ip_capabilities *ip_caps) +{ + *ip_caps = dml2_dcn42_max_ip_caps; +} + +static struct soc_and_ip_translator_funcs dcn42_translator_funcs = { + .get_soc_bb = dcn401_get_soc_bb, + .get_ip_caps = dcn42_get_ip_caps, +}; + +void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator) +{ + soc_and_ip_translator->translator_funcs = &dcn42_translator_funcs; +} diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h new file mode 100644 index 000000000000..914dcbb369a7 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/dcn42/dcn42_soc_and_ip_translator.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#ifndef _DCN42_SOC_AND_IP_TRANSLATOR_H_ +#define _DCN42_SOC_AND_IP_TRANSLATOR_H_ + +#include "core_types.h" +#include "dc.h" +#include "clk_mgr.h" +#include "dml_top_soc_parameter_types.h" +#include "soc_and_ip_translator.h" + +void dcn42_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator); + +#endif /* _DCN42_SOC_AND_IP_TRANSLATOR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c new file mode 100644 index 000000000000..0fc0e5a6c171 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/soc_and_ip_translator/soc_and_ip_translator.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2025 Advanced Micro Devices, Inc. + +#include "soc_and_ip_translator.h" +#include "soc_and_ip_translator/dcn401/dcn401_soc_and_ip_translator.h" + +static void dc_construct_soc_and_ip_translator(struct soc_and_ip_translator *soc_and_ip_translator, + enum dce_version dc_version) +{ + switch (dc_version) { + case DCN_VERSION_4_01: + dcn401_construct_soc_and_ip_translator(soc_and_ip_translator); + break; + default: + break; + } +} + +struct soc_and_ip_translator *dc_create_soc_and_ip_translator(enum dce_version dc_version) +{ + struct soc_and_ip_translator *soc_and_ip_translator; + + soc_and_ip_translator = kzalloc(sizeof(*soc_and_ip_translator), GFP_KERNEL); + if (!soc_and_ip_translator) + return NULL; + + dc_construct_soc_and_ip_translator(soc_and_ip_translator, dc_version); + + return soc_and_ip_translator; +} + +void dc_destroy_soc_and_ip_translator(struct soc_and_ip_translator **soc_and_ip_translator) +{ + kfree(*soc_and_ip_translator); + *soc_and_ip_translator = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c index e0008c5f08ad..b1fb0f8a253a 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.c @@ -196,7 +196,12 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1; struct spl_rect mpc_rec; - if (use_recout_width_aligned) { + if (spl_in->basic_in.custom_width != 0) { + mpc_rec.width = spl_in->basic_in.custom_width; + mpc_rec.x = spl_in->basic_in.custom_x; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + } else if (use_recout_width_aligned) { mpc_rec.width = recout_width_align; if ((mpc_rec.width * (mpc_slice_idx + 1)) > plane_clip_rec->width) { mpc_rec.width = plane_clip_rec->width % recout_width_align; @@ -219,7 +224,7 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( /* extra pixels in the division remainder need to go to pipes after * the extra pixel index minus one(epimo) defined here as: */ - if (mpc_slice_idx > epimo) { + if (mpc_slice_idx > epimo && spl_in->basic_in.custom_width == 0) { mpc_rec.x += mpc_slice_idx - epimo - 1; mpc_rec.width += 1; } @@ -252,10 +257,10 @@ static struct spl_rect calculate_odm_slice_in_timing_active(struct spl_in *spl_i odm_rec.x = odm_slice_width * odm_slice_idx; odm_rec.width = is_last_odm_slice ? - /* last slice width is the reminder of h_active */ - h_active - odm_slice_width * (odm_slice_count - 1) : - /* odm slice width is the floor of h_active / count */ - odm_slice_width; + /* last slice width is the reminder of h_active */ + h_active - odm_slice_width * (odm_slice_count - 1) : + /* odm slice width is the floor of h_active / count */ + odm_slice_width; odm_rec.y = 0; odm_rec.height = v_active; @@ -636,16 +641,16 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, /* this gives the direction of the cositing (negative will move * left, right otherwise) */ - int sign = 1; + int h_sign = flip_horz_scan_dir ? -1 : 1; + int v_sign = flip_vert_scan_dir ? -1 : 1; switch (spl_in->basic_in.cositing) { - case CHROMA_COSITING_TOPLEFT: - init_adj_h = spl_fixpt_from_fraction(sign, 4); - init_adj_v = spl_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); + init_adj_v = spl_fixpt_from_fraction(v_sign, 4); break; case CHROMA_COSITING_LEFT: - init_adj_h = spl_fixpt_from_fraction(sign, 4); + init_adj_h = spl_fixpt_from_fraction(h_sign, 4); init_adj_v = spl_fixpt_zero; break; case CHROMA_COSITING_NONE: @@ -884,7 +889,9 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, /* Calculate number of tap with adaptive scaling off */ static void spl_get_taps_non_adaptive_scaler( - struct spl_scratch *spl_scratch, const struct spl_taps *in_taps, bool always_scale) + struct spl_scratch *spl_scratch, + const struct spl_taps *in_taps, + bool is_subsampled) { bool check_max_downscale = false; @@ -945,14 +952,15 @@ static void spl_get_taps_non_adaptive_scaler( SPL_ASSERT(check_max_downscale); - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz) && !always_scale) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz)) spl_scratch->scl_data.taps.h_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert) && !always_scale) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)) spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !always_scale) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled) spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !always_scale) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled) spl_scratch->scl_data.taps.v_taps_c = 1; + } /* Calculate optimal number of taps */ @@ -965,15 +973,13 @@ static bool spl_get_optimal_number_of_taps( unsigned int max_taps_y, max_taps_c; unsigned int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - bool skip_easf = false; - bool always_scale = spl_in->basic_out.always_scale; + bool skip_easf = false; bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format); - if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && spl_scratch->scl_data.viewport.width > max_downscale_src_width) { - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled); *enable_easf_v = false; *enable_easf_h = false; *enable_isharp = false; @@ -982,7 +988,7 @@ static bool spl_get_optimal_number_of_taps( /* Disable adaptive scaler and sharpener when integer scaling is enabled */ if (spl_in->scaling_quality.integer_scaling) { - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled); *enable_easf_v = false; *enable_easf_h = false; *enable_isharp = false; @@ -997,8 +1003,9 @@ static bool spl_get_optimal_number_of_taps( * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (skip_easf) - spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, always_scale); + if (skip_easf) { + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps, is_subsampled); + } else { if (spl_is_video_format(spl_in->basic_in.format)) { spl_scratch->scl_data.taps.h_taps = 6; @@ -1124,7 +1131,6 @@ static bool spl_get_optimal_number_of_taps( (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) { spl_scratch->scl_data.taps.h_taps = 1; spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled) spl_scratch->scl_data.taps.h_taps_c = 1; @@ -1149,6 +1155,7 @@ static bool spl_get_optimal_number_of_taps( if ((!*enable_easf_v) && !is_subsampled && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) spl_scratch->scl_data.taps.v_taps_c = 1; + } } return true; diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h index 36a284305a70..23d254dea18f 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl_types.h @@ -460,6 +460,8 @@ struct basic_in { enum spl_color_space color_space; // Color Space unsigned int max_luminance; // Max Luminance TODO: Is determined in dc_hw_sequencer.c is_sdr bool film_grain_applied; // Film Grain Applied // TODO: To check from where to get this? + int custom_width; // Width for non-standard segmentation - used when != 0 + int custom_x; // Start x for non-standard segmentation - used when custom_width != 0 }; // Basic output information diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c index ad088d70e189..6ffc74fc9dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c @@ -44,6 +44,11 @@ static void virtual_stream_encoder_dvi_set_stream_attribute( struct dc_crtc_timing *crtc_timing, bool is_dual_link) {} +static void virtual_stream_encoder_lvds_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing) +{} + static void virtual_stream_encoder_set_throttled_vcp_size( struct stream_encoder *enc, struct fixed31_32 avg_time_slots_per_mtp) @@ -115,6 +120,8 @@ static const struct stream_encoder_funcs virtual_str_enc_funcs = { virtual_stream_encoder_hdmi_set_stream_attribute, .dvi_set_stream_attribute = virtual_stream_encoder_dvi_set_stream_attribute, + .lvds_set_stream_attribute = + virtual_stream_encoder_lvds_set_stream_attribute, .set_throttled_vcp_size = virtual_stream_encoder_set_throttled_vcp_size, .update_hdmi_info_packets = diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 3f3fa1b6a69e..338fdc651f2c 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -129,7 +129,9 @@ enum dmub_window_id { DMUB_WINDOW_5_TRACEBUFF, DMUB_WINDOW_6_FW_STATE, DMUB_WINDOW_7_SCRATCH_MEM, + DMUB_WINDOW_IB_MEM, DMUB_WINDOW_SHARED_STATE, + DMUB_WINDOW_LSDMA_BUFFER, DMUB_WINDOW_TOTAL, }; @@ -314,6 +316,7 @@ struct dmub_srv_hw_params { bool disable_sldo_opt; bool enable_non_transparent_setconfig; bool lower_hbr3_phy_ssc; + bool override_hbr3_pll_vco; }; /** @@ -355,6 +358,7 @@ struct dmub_diagnostic_data { uint8_t is_traceport_en : 1; uint8_t is_cw0_enabled : 1; uint8_t is_cw6_enabled : 1; + uint8_t is_pwait : 1; }; struct dmub_srv_inbox { @@ -539,6 +543,7 @@ struct dmub_srv { uint32_t fw_version; bool is_virtual; struct dmub_fb scratch_mem_fb; + struct dmub_fb ib_mem_gart; volatile struct dmub_shared_state_feature_block *shared_state; volatile const struct dmub_fw_state *fw_state; @@ -563,6 +568,7 @@ struct dmub_srv { bool sw_init; bool hw_init; + bool dpia_supported; uint64_t fb_base; uint64_t fb_offset; @@ -576,6 +582,7 @@ struct dmub_srv { enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; + struct dmub_fb lsdma_rb_fb; }; /** @@ -592,6 +599,8 @@ struct dmub_notification { enum dmub_notification_type type; uint8_t link_index; uint8_t result; + /* notify instance from DMUB */ + uint8_t instance; bool pending_notification; union { struct aux_reply_data aux_reply; @@ -602,14 +611,6 @@ struct dmub_notification { }; }; -/* enum dmub_ips_mode - IPS mode identifier */ -enum dmub_ips_mode { - DMUB_IPS_MODE_IPS1_MAX = 0, - DMUB_IPS_MODE_IPS2, - DMUB_IPS_MODE_IPS1_RCG, - DMUB_IPS_MODE_IPS1_ONO2_ON -}; - /** * DMUB firmware version helper macro - useful for checking if the version * of a firmware to know if feature or functionality is supported or present. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b66bd10cdc9b..92248224b713 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -104,6 +104,14 @@ */ #define DMUB_MAX_FPO_STREAMS 4 +/* Define to ensure that the "common" members always appear in the same + * order in different structs for back compat purposes + */ +#define COMMON_STREAM_STATIC_SUB_STATE \ + struct dmub_fams2_cmd_legacy_stream_static_state legacy; \ + struct dmub_fams2_cmd_subvp_stream_static_state subvp; \ + struct dmub_fams2_cmd_drr_stream_static_state drr; + /* Maximum number of streams on any ASIC. */ #define DMUB_MAX_STREAMS 6 @@ -291,6 +299,31 @@ union dmub_addr { } u; /*<< Low/high bit access */ uint64_t quad_part; /*<< 64 bit address */ }; + +/* Flattened structure containing SOC BB parameters stored in the VBIOS + * It is not practical to store the entire bounding box in VBIOS since the bounding box struct can gain new parameters. + * This also prevents alighment issues when new parameters are added to the SoC BB. + * The following parameters should be added since these values can't be obtained elsewhere: + * -dml2_soc_power_management_parameters + * -dml2_soc_vmin_clock_limits + */ +struct dmub_soc_bb_params { + uint32_t dram_clk_change_blackout_ns; + uint32_t dram_clk_change_read_only_ns; + uint32_t dram_clk_change_write_only_ns; + uint32_t fclk_change_blackout_ns; + uint32_t g7_ppt_blackout_ns; + uint32_t stutter_enter_plus_exit_latency_ns; + uint32_t stutter_exit_latency_ns; + uint32_t z8_stutter_enter_plus_exit_latency_ns; + uint32_t z8_stutter_exit_latency_ns; + uint32_t z8_min_idle_time_ns; + uint32_t type_b_dram_clk_change_blackout_ns; + uint32_t type_b_ppt_blackout_ns; + uint32_t vmin_limit_dispclk_khz; + uint32_t vmin_limit_dcfclk_khz; + uint32_t g7_temperature_read_blackout_ns; +}; #pragma pack(pop) /** @@ -757,11 +790,29 @@ enum dmub_ips_rcg_disable_type { DMUB_IPS_RCG_DISABLE = 3 }; +enum dmub_ips_in_vpb_disable_type { + DMUB_IPS_VPB_RCG_ONLY = 0, // Legacy behaviour + DMUB_IPS_VPB_DISABLE_ALL = 1, + DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG = 2, + DMUB_IPS_VPB_ENABLE_ALL = 3 // Enable IPS1 Z8, IPS1 and RCG +}; + #define DMUB_IPS1_ALLOW_MASK 0x00000001 #define DMUB_IPS2_ALLOW_MASK 0x00000002 #define DMUB_IPS1_COMMIT_MASK 0x00000004 #define DMUB_IPS2_COMMIT_MASK 0x00000008 +enum dmub_ips_comand_type { + /** + * Start/stop IPS residency measurements for a given IPS mode + */ + DMUB_CMD__IPS_RESIDENCY_CNTL = 0, + /** + * Query IPS residency information for a given IPS mode + */ + DMUB_CMD__IPS_QUERY_RESIDENCY_INFO = 1, +}; + /** * union dmub_fw_boot_options - Boot option definitions for SCRATCH14 */ @@ -792,7 +843,8 @@ union dmub_fw_boot_options { uint32_t ips_sequential_ono: 1; /**< 1 to enable sequential ONO IPS sequence */ uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */ uint32_t lower_hbr3_phy_ssc: 1; /**< 1 to lower hbr3 phy ssc to 0.125 percent */ - uint32_t reserved : 6; /**< reserved */ + uint32_t override_hbr3_pll_vco: 1; /**< 1 to override the hbr3 pll vco to 0 */ + uint32_t reserved : 5; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; @@ -856,7 +908,9 @@ union dmub_shared_state_ips_driver_signals { uint32_t allow_ips0_rcg : 1; /**< 1 is IPS0 RCG is allowed */ uint32_t allow_ips1_rcg : 1; /**< 1 is IPS1 RCG is allowed */ uint32_t allow_ips1z8 : 1; /**< 1 is IPS1 Z8 Retention is allowed */ - uint32_t reserved_bits : 24; /**< Reversed bits */ + uint32_t allow_dynamic_ips1 : 1; /**< 1 if IPS1 is allowed in dynamic use cases such as VPB */ + uint32_t allow_dynamic_ips1_z8: 1; /**< 1 if IPS1 z8 ret is allowed in dynamic use cases such as VPB */ + uint32_t reserved_bits : 22; /**< Reversed bits */ } bits; uint32_t all; }; @@ -1508,6 +1562,16 @@ enum dmub_cmd_type { */ DMUB_CMD__FUSED_IO = 89, + /** + * Command type used for all LSDMA commands. + */ + DMUB_CMD__LSDMA = 90, + + /** + * Command type use for all IPS commands. + */ + DMUB_CMD__IPS = 91, + DMUB_CMD__VBIOS = 128, }; @@ -1918,6 +1982,154 @@ struct dmub_rb_cmd_fams2_flip { struct dmub_fams2_flip_info flip_info; }; +struct dmub_cmd_lsdma_data { + union { + struct lsdma_init_data { + union dmub_addr gpu_addr_base; + uint32_t ring_size; + } init_data; + struct lsdma_tiled_copy_data { + uint32_t src_addr_lo; + uint32_t src_addr_hi; + + uint32_t dst_addr_lo; + uint32_t dst_addr_hi; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t src_width : 16; + uint32_t src_height : 16; + + uint32_t dst_width : 16; + uint32_t dst_height : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_swizzle_mode : 5; + uint32_t src_mip_max : 5; + uint32_t src_mip_id : 5; + uint32_t dst_mip_max : 5; + uint32_t dst_swizzle_mode : 5; + uint32_t dst_mip_id : 5; + uint32_t tmz : 1; + uint32_t dcc : 1; + + uint32_t data_format : 6; + uint32_t padding1 : 4; + uint32_t dst_element_size : 3; + uint32_t num_type : 3; + uint32_t src_element_size : 3; + uint32_t write_compress : 2; + uint32_t cache_policy_dst : 2; + uint32_t cache_policy_src : 2; + uint32_t read_compress : 2; + uint32_t src_dim : 2; + uint32_t dst_dim : 2; + uint32_t max_uncom : 1; + + uint32_t max_com : 2; + uint32_t padding : 30; + } tiled_copy_data; + struct lsdma_linear_copy_data { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + + uint32_t count : 30; + uint32_t cache_policy_dst : 2; + + uint32_t tmz : 1; + uint32_t cache_policy_src : 2; + uint32_t padding : 29; + } linear_copy_data; + struct lsdma_linear_sub_window_copy_data { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + + uint32_t src_x : 16; + uint32_t src_y : 16; + + uint32_t dst_x : 16; + uint32_t dst_y : 16; + + uint32_t rect_x : 16; + uint32_t rect_y : 16; + + uint32_t src_pitch : 16; + uint32_t dst_pitch : 16; + + uint32_t src_slice_pitch; + uint32_t dst_slice_pitch; + + uint32_t tmz : 1; + uint32_t element_size : 3; + uint32_t src_cache_policy : 3; + uint32_t dst_cache_policy : 3; + uint32_t reserved0 : 22; + } linear_sub_window_copy_data; + struct lsdma_reg_write_data { + uint32_t reg_addr; + uint32_t reg_data; + } reg_write_data; + struct lsdma_pio_copy_data { + uint32_t src_lo; + uint32_t src_hi; + + uint32_t dst_lo; + uint32_t dst_hi; + + union { + struct { + uint32_t byte_count : 26; + uint32_t src_loc : 1; + uint32_t dst_loc : 1; + uint32_t src_addr_inc : 1; + uint32_t dst_addr_inc : 1; + uint32_t overlap_disable : 1; + uint32_t constant_fill : 1; + } fields; + uint32_t raw; + } packet; + } pio_copy_data; + struct lsdma_pio_constfill_data { + uint32_t dst_lo; + uint32_t dst_hi; + + union { + struct { + uint32_t byte_count : 26; + uint32_t src_loc : 1; + uint32_t dst_loc : 1; + uint32_t src_addr_inc : 1; + uint32_t dst_addr_inc : 1; + uint32_t overlap_disable : 1; + uint32_t constant_fill : 1; + } fields; + uint32_t raw; + } packet; + + uint32_t data; + } pio_constfill_data; + + uint32_t all[14]; + } u; +}; + +struct dmub_rb_cmd_lsdma { + struct dmub_cmd_header header; + struct dmub_cmd_lsdma_data lsdma_data; +}; + struct dmub_optc_state_v2 { uint32_t v_total_min; uint32_t v_total_max; @@ -1949,6 +2161,28 @@ enum fams2_stream_type { FAMS2_STREAM_TYPE_SUBVP = 4, }; +struct dmub_rect16 { + /** + * Dirty rect x offset. + */ + uint16_t x; + + /** + * Dirty rect y offset. + */ + uint16_t y; + + /** + * Dirty rect width. + */ + uint16_t width; + + /** + * Dirty rect height. + */ + uint16_t height; +}; + /* static stream state */ struct dmub_fams2_legacy_stream_static_state { uint8_t vactive_det_fill_delay_otg_vlines; @@ -2021,11 +2255,13 @@ union dmub_fams2_stream_static_sub_state { }; //v0 union dmub_fams2_cmd_stream_static_sub_state { - struct dmub_fams2_cmd_legacy_stream_static_state legacy; - struct dmub_fams2_cmd_subvp_stream_static_state subvp; - struct dmub_fams2_cmd_drr_stream_static_state drr; + COMMON_STREAM_STATIC_SUB_STATE }; //v1 +union dmub_fams2_stream_static_sub_state_v2 { + COMMON_STREAM_STATIC_SUB_STATE +}; //v2 + struct dmub_fams2_stream_static_state { enum fams2_stream_type type; uint32_t otg_vline_time_ns; @@ -2091,7 +2327,7 @@ struct dmub_fams2_cmd_stream_static_base_state { struct dmub_fams2_stream_static_state_v1 { struct dmub_fams2_cmd_stream_static_base_state base; - union dmub_fams2_cmd_stream_static_sub_state sub_state; + union dmub_fams2_stream_static_sub_state_v2 sub_state; }; //v1 /** @@ -2128,6 +2364,7 @@ struct dmub_cmd_fams2_global_config { union dmub_fams2_global_feature_config features; uint32_t recovery_timeout_us; uint32_t hwfq_flip_programming_delay_us; + uint32_t max_allow_to_target_delta_us; // how early DCN could assert P-State allow compared to the P-State target }; union dmub_cmd_fams2_config { @@ -2139,6 +2376,11 @@ union dmub_cmd_fams2_config { } stream_v1; //v1 }; +struct dmub_fams2_config_v2 { + struct dmub_cmd_fams2_global_config global; + struct dmub_fams2_stream_static_state_v1 stream_v1[DMUB_MAX_STREAMS]; //v1 +}; + /** * DMUB rb command definition for FAMS2 (merged SubVP, FPO, Legacy) */ @@ -2148,6 +2390,22 @@ struct dmub_rb_cmd_fams2 { }; /** + * Indirect buffer descriptor + */ +struct dmub_ib_data { + union dmub_addr src; // location of indirect buffer in memory + uint16_t size; // indirect buffer size in bytes +}; + +/** + * DMUB rb command definition for commands passed over indirect buffer + */ +struct dmub_rb_cmd_ib { + struct dmub_cmd_header header; + struct dmub_ib_data ib_data; +}; + +/** * enum dmub_cmd_idle_opt_type - Idle optimization command type. */ enum dmub_cmd_idle_opt_type { @@ -2170,6 +2428,11 @@ enum dmub_cmd_idle_opt_type { * DCN hardware notify power state. */ DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE = 3, + + /** + * DCN notify to release HW. + */ + DMUB_CMD__IDLE_OPT_RELEASE_HW = 4, }; /** @@ -2315,7 +2578,8 @@ struct dmub_dig_transmitter_control_data_v1_7 { uint8_t connobj_id; /**< Connector Object Id defined in ObjectId.h */ uint8_t HPO_instance; /**< HPO instance (0: inst0, 1: inst1) */ uint8_t reserved1; /**< For future use */ - uint8_t reserved2[3]; /**< For future use */ + uint8_t skip_phy_ssc_reduction; + uint8_t reserved2[2]; /**< For future use */ uint32_t reserved3[11]; /**< For future use */ }; @@ -2933,6 +3197,7 @@ enum dmub_cmd_fams_type { DMUB_CMD__FAMS2_CONFIG = 4, DMUB_CMD__FAMS2_DRR_UPDATE = 5, DMUB_CMD__FAMS2_FLIP = 6, + DMUB_CMD__FAMS2_IB_CONFIG = 7, }; /** @@ -3756,6 +4021,10 @@ enum dmub_cmd_replay_type { */ DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP = 8, /** + * Set version + */ + DMUB_CMD__REPLAY_SET_VERSION = 9, + /** * Set Replay General command. */ DMUB_CMD__REPLAY_SET_GENERAL_CMD = 16, @@ -3785,6 +4054,10 @@ struct dmub_alpm_auxless_data { uint16_t lfps_t1_t2_override_us; short lfps_t1_t2_offset_us; uint8_t lttpr_count; + /* + * Padding to align structure to 4 byte boundary. + */ + uint8_t pad[1]; }; /** @@ -3861,6 +4134,79 @@ struct dmub_cmd_replay_copy_settings_data { * Use for AUX-less ALPM LFPS wake operation */ struct dmub_alpm_auxless_data auxless_alpm_data; + /** + * @hpo_stream_enc_inst: HPO stream encoder instance + */ + uint8_t hpo_stream_enc_inst; + /** + * @hpo_link_enc_inst: HPO link encoder instance + */ + uint8_t hpo_link_enc_inst; + /** + * Determines if fast resync in ultra sleep mode is enabled/disabled. + */ + uint8_t replay_support_fast_resync_in_ultra_sleep_mode; + /** + * @pad: Align structure to 4 byte boundary. + */ + uint8_t pad[1]; +}; + + +/** + * Replay versions. + */ +enum replay_version { + /** + * FreeSync Replay + */ + REPLAY_VERSION_FREESYNC_REPLAY = 0, + /** + * Panel Replay + */ + REPLAY_VERSION_PANEL_REPLAY = 1, + /** + * Replay not supported. + */ + REPLAY_VERSION_UNSUPPORTED = 0xFF, +}; + +/** + * Data passed from driver to FW in a DMUB_CMD___SET_REPLAY_VERSION command. + */ +struct dmub_cmd_replay_set_version_data { + /** + * Panel Instance. + * Panel instance to identify which psr_state to use + * Currently the support is only for 0 or 1 + */ + uint8_t panel_inst; + /** + * PSR version that FW should implement. + */ + enum replay_version version; + /** + * PSR control version. + */ + uint8_t cmd_version; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad[2]; +}; + +/** + * Definition of a DMUB_CMD__REPLAY_SET_VERSION command. + */ +struct dmub_rb_cmd_replay_set_version { + /** + * Command header. + */ + struct dmub_cmd_header header; + /** + * Data passed from driver to FW in a DMUB_CMD__REPLAY_SET_VERSION command. + */ + struct dmub_cmd_replay_set_version_data replay_set_version_data; }; /** @@ -4215,6 +4561,10 @@ union dmub_replay_cmd_set { */ struct dmub_cmd_replay_disabled_adaptive_sync_sdp_data disabled_adaptive_sync_sdp_data; /** + * Definition of DMUB_CMD__REPLAY_SET_VERSION command data. + */ + struct dmub_cmd_replay_set_version_data version_data; + /** * Definition of DMUB_CMD__REPLAY_SET_GENERAL_CMD command data. */ struct dmub_cmd_replay_set_general_cmd_data set_general_cmd_data; @@ -4416,6 +4766,41 @@ enum dmub_cmd_abm_type { DMUB_CMD__ABM_GET_HISTOGRAM_DATA = 11, }; +/** + * LSDMA command sub-types. + */ +enum dmub_cmd_lsdma_type { + /** + * Initialize parameters for LSDMA. + * Ring buffer is mapped to the ring buffer + */ + DMUB_CMD__LSDMA_INIT_CONFIG = 0, + /** + * LSDMA copies data from source to destination linearly + */ + DMUB_CMD__LSDMA_LINEAR_COPY = 1, + /** + * LSDMA copies data from source to destination linearly in sub window + */ + DMUB_CMD__LSDMA_LINEAR_SUB_WINDOW_COPY = 2, + /** + * Send the tiled-to-tiled copy command + */ + DMUB_CMD__LSDMA_TILED_TO_TILED_COPY = 3, + /** + * Send the poll reg write command + */ + DMUB_CMD__LSDMA_POLL_REG_WRITE = 4, + /** + * Send the pio copy command + */ + DMUB_CMD__LSDMA_PIO_COPY = 5, + /** + * Send the pio constfill command + */ + DMUB_CMD__LSDMA_PIO_CONSTFILL = 6, +}; + struct abm_ace_curve { /** * @offsets: ACE curve offsets. @@ -5621,6 +6006,64 @@ struct dmub_rb_cmd_assr_enable { }; /** + * Current definition of "ips_mode" from driver + */ +enum ips_residency_mode { + IPS_RESIDENCY__IPS1_MAX, + IPS_RESIDENCY__IPS2, + IPS_RESIDENCY__IPS1_RCG, + IPS_RESIDENCY__IPS1_ONO2_ON, + IPS_RESIDENCY__IPS1_Z8_RETENTION, + IPS_RESIDENCY__PG_ONO_LAST_SEEN_IN_IPS, + IPS_RESIDENCY__PG_ONO_CURRENT_STATE +}; + +#define NUM_IPS_HISTOGRAM_BUCKETS 16 + +/** + * IPS residency statistics to be sent to driver - subset of struct dmub_ips_residency_stats + */ +struct dmub_ips_residency_info { + uint32_t residency_millipercent; + uint32_t entry_counter; + uint32_t histogram[NUM_IPS_HISTOGRAM_BUCKETS]; + uint64_t total_time_us; + uint64_t total_inactive_time_us; + uint32_t ono_pg_state_at_collection; + uint32_t ono_pg_state_last_seen_in_ips; +}; + +/** + * Data passed from driver to FW in a DMUB_CMD__IPS_RESIDENCY_CNTL command. + */ +struct dmub_cmd_ips_residency_cntl_data { + uint8_t panel_inst; + uint8_t start_measurement; + uint8_t padding[2]; // align to 4-byte boundary +}; + +struct dmub_rb_cmd_ips_residency_cntl { + struct dmub_cmd_header header; + struct dmub_cmd_ips_residency_cntl_data cntl_data; +}; + +/** + * Data passed from FW to driver in a DMUB_CMD__IPS_QUERY_RESIDENCY_INFO command. + */ +struct dmub_cmd_ips_query_residency_info_data { + union dmub_addr dest; + uint32_t size; + uint32_t ips_mode; + uint8_t panel_inst; + uint8_t padding[3]; // align to 4-byte boundary +}; + +struct dmub_rb_cmd_ips_query_residency_info { + struct dmub_cmd_header header; + struct dmub_cmd_ips_query_residency_info_data info_data; +}; + +/** * union dmub_rb_cmd - DMUB inbox command. */ union dmub_rb_cmd { @@ -5884,6 +6327,10 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__IDLE_OPT_SET_DC_POWER_STATE command. */ struct dmub_rb_cmd_idle_opt_set_dc_power_state idle_opt_set_dc_power_state; + /** + * Definition of a DMUB_CMD__REPLAY_SET_VERSION command. + */ + struct dmub_rb_cmd_replay_set_version replay_set_version; /* * Definition of a DMUB_CMD__REPLAY_COPY_SETTINGS command. */ @@ -5926,13 +6373,25 @@ union dmub_rb_cmd { * Definition of a DMUB_CMD__PSP_ASSR_ENABLE command. */ struct dmub_rb_cmd_assr_enable assr_enable; + struct dmub_rb_cmd_fams2 fams2_config; + struct dmub_rb_cmd_ib ib_fams2_config; + struct dmub_rb_cmd_fams2_drr_update fams2_drr_update; struct dmub_rb_cmd_fams2_flip fams2_flip; struct dmub_rb_cmd_fused_io fused_io; + + /** + * Definition of a DMUB_CMD__LSDMA command. + */ + struct dmub_rb_cmd_lsdma lsdma; + + struct dmub_rb_cmd_ips_residency_cntl ips_residency_cntl; + + struct dmub_rb_cmd_ips_query_residency_info ips_query_residency_info; }; /** @@ -6080,15 +6539,18 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rb->wrpt); - const uint64_t *src = (const uint64_t *)cmd; + uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; + const uint8_t *src = (const uint8_t *)cmd; uint8_t i; + if (rb->capacity == 0) + return false; + if (dmub_rb_full(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + for (i = 0; i < DMUB_RB_CMD_SIZE; i++) *dst++ = *src++; rb->wrpt += DMUB_RB_CMD_SIZE; @@ -6113,6 +6575,9 @@ static inline bool dmub_rb_out_push_front(struct dmub_rb *rb, uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; const uint8_t *src = (const uint8_t *)cmd; + if (rb->capacity == 0) + return false; + if (dmub_rb_full(rb)) return false; @@ -6158,6 +6623,9 @@ static inline void dmub_rb_get_rptr_with_offset(struct dmub_rb *rb, uint32_t num_cmds, uint32_t *next_rptr) { + if (rb->capacity == 0) + return; + *next_rptr = rb->rptr + DMUB_RB_CMD_SIZE * num_cmds; if (*next_rptr >= rb->capacity) @@ -6221,6 +6689,9 @@ static inline bool dmub_rb_out_front(struct dmub_rb *rb, */ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) { + if (rb->capacity == 0) + return false; + if (dmub_rb_empty(rb)) return false; @@ -6245,6 +6716,9 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t rptr = rb->rptr; uint32_t wptr = rb->wrpt; + if (rb->capacity == 0) + return; + while (rptr != wptr) { uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index a308bd604677..4777c7203b2c 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -377,6 +377,7 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; + boot_options.bits.override_hbr3_pll_vco = params->override_hbr3_pll_vco; boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0; @@ -416,7 +417,7 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub) void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset, is_pwait; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; struct dmub_timeout_info timeout = {0}; @@ -466,6 +467,9 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub) REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); dmub->debug.is_dmcub_enabled = is_dmub_enabled; + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait); + dmub->debug.is_pwait = is_pwait; + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); dmub->debug.is_dmcub_soft_reset = is_soft_reset; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e7056205b050..ce041f6239dc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -89,44 +89,50 @@ static inline void dmub_dcn32_translate_addr(const union dmub_addr *addr_in, void dmub_dcn32_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 30; - uint32_t in_reset, scratch, i; + const uint32_t timeout = 100000; + uint32_t in_reset, is_enabled, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (in_reset == 0) { + if (in_reset == 0 && is_enabled != 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - /** - * Timeout covers both the ACK and the wait - * for remaining work to finish. - * - * This is mostly bound by the PHY disable sequence. - * Each register check will be greater than 1us, so - * don't bother using udelay. - */ - for (i = 0; i < timeout; ++i) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; + + udelay(1); } for (i = 0; i < timeout; ++i) { - scratch = dmub->hw_funcs.get_gpint_response(dmub); + scratch = REG_READ(DMCUB_SCRATCH7); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; + + udelay(1); } + for (i = 0; i < timeout; ++i) { + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode); + if (pwait_mode & (1 << 0)) + break; + + udelay(1); + } /* Force reset in case we timed out, DMCUB is likely hung. */ } - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + if (is_enabled) { + REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); + udelay(1); + REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); + } + REG_WRITE(DMCUB_INBOX1_RPTR, 0); REG_WRITE(DMCUB_INBOX1_WPTR, 0); REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); @@ -135,7 +141,7 @@ void dmub_dcn32_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_OUTBOX0_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); - /* Clear the GPINT command manually so we don't reset again. */ + /* Clear the GPINT command manually so we don't send anything during boot. */ cmd.all = 0; dmub->hw_funcs.set_gpint(dmub, cmd); } @@ -419,8 +425,8 @@ uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub) void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; - uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + uint32_t is_dmub_enabled, is_soft_reset, is_pwait; + uint32_t is_traceport_enabled, is_cw6_enabled; struct dmub_timeout_info timeout = {0}; if (!dmub) @@ -470,18 +476,15 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub) REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); dmub->debug.is_dmcub_enabled = is_dmub_enabled; + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait); + dmub->debug.is_pwait = is_pwait; + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); dmub->debug.is_dmcub_soft_reset = is_soft_reset; - REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - dmub->debug.is_dmcub_secure_reset = is_sec_reset; - REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); dmub->debug.is_traceport_en = is_traceport_enabled; - REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - dmub->debug.is_cw0_enabled = is_cw0_enabled; - REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); dmub->debug.is_cw6_enabled = is_cw6_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h index 1a229450c53d..daf81027d663 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h @@ -89,6 +89,9 @@ struct dmub_srv; DMUB_SR(DMCUB_REGION5_OFFSET) \ DMUB_SR(DMCUB_REGION5_OFFSET_HIGH) \ DMUB_SR(DMCUB_REGION5_TOP_ADDRESS) \ + DMUB_SR(DMCUB_REGION6_OFFSET) \ + DMUB_SR(DMCUB_REGION6_OFFSET_HIGH) \ + DMUB_SR(DMCUB_REGION6_TOP_ADDRESS) \ DMUB_SR(DMCUB_SCRATCH0) \ DMUB_SR(DMCUB_SCRATCH1) \ DMUB_SR(DMCUB_SCRATCH2) \ @@ -155,6 +158,8 @@ struct dmub_srv; DMUB_SF(DMCUB_REGION4_TOP_ADDRESS, DMCUB_REGION4_ENABLE) \ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION5_TOP_ADDRESS, DMCUB_REGION5_ENABLE) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_TOP_ADDRESS) \ + DMUB_SF(DMCUB_REGION6_TOP_ADDRESS, DMCUB_REGION6_ENABLE) \ DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ @@ -162,7 +167,8 @@ struct dmub_srv; DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \ DMUB_SF(DMCUB_REGION3_TMR_AXI_SPACE, DMCUB_REGION3_TMR_AXI_SPACE) \ DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \ - DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) + DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \ + DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS) struct dmub_srv_dcn32_reg_offset { #define DMUB_SR(reg) uint32_t reg; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 72a0f078cd1a..834e5434ccb8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -92,19 +92,15 @@ void dmub_dcn35_reset(struct dmub_srv *dmub) uint32_t in_reset, is_enabled, scratch, i, pwait_mode; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (in_reset == 0) { + if (in_reset == 0 && is_enabled != 0) { cmd.bits.status = 1; cmd.bits.command_code = DMUB_GPINT__STOP_FW; cmd.bits.param = 0; dmub->hw_funcs.set_gpint(dmub, cmd); - /** - * Timeout covers both the ACK and the wait - * for remaining work to finish. - */ - for (i = 0; i < timeout; ++i) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; @@ -130,11 +126,9 @@ void dmub_dcn35_reset(struct dmub_srv *dmub) /* Force reset in case we timed out, DMCUB is likely hung. */ } - REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_enabled); - if (is_enabled) { REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); + udelay(1); REG_UPDATE(DMCUB_CNTL, DMCUB_ENABLE, 0); } @@ -160,11 +154,7 @@ void dmub_dcn35_reset_release(struct dmub_srv *dmub) LONO_SOCCLK_GATE_DISABLE, 1, LONO_DMCUBCLK_GATE_DISABLE, 1); - REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 1); - udelay(1); REG_UPDATE_2(DMCUB_CNTL, DMCUB_ENABLE, 1, DMCUB_TRACEPORT_EN, 1); - REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 1); - udelay(1); REG_UPDATE(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET, 0); REG_UPDATE(DMCUB_CNTL2, DMCUB_SOFT_RESET, 0); } @@ -410,13 +400,14 @@ union dmub_fw_boot_options dmub_dcn35_get_fw_boot_option(struct dmub_srv *dmub) void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params) { union dmub_fw_boot_options boot_options = {0}; - union dmub_fw_boot_options cur_boot_options = {0}; - cur_boot_options = dmub_dcn35_get_fw_boot_option(dmub); + if (!dmub->dpia_supported) { + dmub->dpia_supported = dmub_dcn35_get_fw_boot_option(dmub).bits.enable_dpia; + } boot_options.bits.z10_disable = params->disable_z10; boot_options.bits.dpia_supported = params->dpia_supported; - boot_options.bits.enable_dpia = cur_boot_options.bits.enable_dpia && !params->disable_dpia; + boot_options.bits.enable_dpia = dmub->dpia_supported && !params->disable_dpia; boot_options.bits.usb4_cm_version = params->usb4_cm_version; boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; @@ -464,7 +455,7 @@ uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub) void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub) { - uint32_t is_dmub_enabled, is_soft_reset; + uint32_t is_dmub_enabled, is_soft_reset, is_pwait; uint32_t is_traceport_enabled, is_cw6_enabled; struct dmub_timeout_info timeout = {0}; @@ -515,6 +506,9 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub) REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); dmub->debug.is_dmcub_enabled = is_dmub_enabled; + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait); + dmub->debug.is_pwait = is_pwait; + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); dmub->debug.is_dmcub_soft_reset = is_soft_reset; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index 2575dbc448f7..b31adbd0d685 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -413,7 +413,7 @@ uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub) void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset, is_pwait; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; struct dmub_timeout_info timeout = {0}; @@ -464,6 +464,9 @@ void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub) REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); dmub->debug.is_dmcub_enabled = is_dmub_enabled; + REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &is_pwait); + dmub->debug.is_pwait = is_pwait; + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); dmub->debug.is_dmcub_soft_reset = is_soft_reset; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index acca7943a8c8..b17a19400c06 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -65,6 +65,12 @@ /* Default scratch mem size. */ #define DMUB_SCRATCH_MEM_SIZE (1024) +/* Default indirect buffer size. */ +#define DMUB_IB_MEM_SIZE (1280) + +/* Default LSDMA ring buffer size. */ +#define DMUB_LSDMA_RB_SIZE (64 * 1024) + /* Number of windows in use. */ #define DMUB_NUM_WINDOWS (DMUB_WINDOW_TOTAL) /* Base addresses. */ @@ -559,7 +565,9 @@ enum dmub_status window_sizes[DMUB_WINDOW_5_TRACEBUFF] = trace_buffer_size; window_sizes[DMUB_WINDOW_6_FW_STATE] = fw_state_size; window_sizes[DMUB_WINDOW_7_SCRATCH_MEM] = DMUB_SCRATCH_MEM_SIZE; + window_sizes[DMUB_WINDOW_IB_MEM] = DMUB_IB_MEM_SIZE; window_sizes[DMUB_WINDOW_SHARED_STATE] = max(DMUB_FW_HEADER_SHARED_STATE_SIZE, shared_state_size); + window_sizes[DMUB_WINDOW_LSDMA_BUFFER] = DMUB_LSDMA_RB_SIZE; out->fb_size = dmub_srv_calc_regions_for_memory_type(params, out, window_sizes, DMUB_WINDOW_MEMORY_TYPE_FB); @@ -645,6 +653,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, struct dmub_fb *tracebuff_fb = params->fb[DMUB_WINDOW_5_TRACEBUFF]; struct dmub_fb *fw_state_fb = params->fb[DMUB_WINDOW_6_FW_STATE]; struct dmub_fb *scratch_mem_fb = params->fb[DMUB_WINDOW_7_SCRATCH_MEM]; + struct dmub_fb *ib_mem_gart = params->fb[DMUB_WINDOW_IB_MEM]; struct dmub_fb *shared_state_fb = params->fb[DMUB_WINDOW_SHARED_STATE]; struct dmub_rb_init_params rb_params, outbox0_rb_params; @@ -655,7 +664,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_INVALID; if (!inst_fb || !stack_fb || !data_fb || !bios_fb || !mail_fb || - !tracebuff_fb || !fw_state_fb || !scratch_mem_fb) { + !tracebuff_fb || !fw_state_fb || !scratch_mem_fb || !ib_mem_gart) { ASSERT(0); return DMUB_STATUS_INVALID; } @@ -741,6 +750,8 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, dmub->scratch_mem_fb = *scratch_mem_fb; + dmub->ib_mem_gart = *ib_mem_gart; + if (dmub->hw_funcs.setup_windows) dmub->hw_funcs.setup_windows(dmub, &cw2, &cw3, &cw4, &cw5, &cw6, ®ion6); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c index 567c5b1aeb7a..e7a58b140388 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv_stat.c @@ -71,7 +71,7 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, switch (cmd.cmd_common.header.type) { case DMUB_OUT_CMD__DP_AUX_REPLY: notify->type = DMUB_NOTIFICATION_AUX_REPLY; - notify->link_index = cmd.dp_aux_reply.control.instance; + notify->instance = cmd.dp_aux_reply.control.instance; notify->result = cmd.dp_aux_reply.control.result; dmub_memcpy((void *)¬ify->aux_reply, (void *)&cmd.dp_aux_reply.reply_data, sizeof(struct aux_reply_data)); @@ -84,17 +84,17 @@ enum dmub_status dmub_srv_stat_get_notification(struct dmub_srv *dmub, notify->type = DMUB_NOTIFICATION_HPD_IRQ; } - notify->link_index = cmd.dp_hpd_notify.hpd_data.instance; + notify->instance = cmd.dp_hpd_notify.hpd_data.instance; notify->result = AUX_RET_SUCCESS; break; case DMUB_OUT_CMD__SET_CONFIG_REPLY: notify->type = DMUB_NOTIFICATION_SET_CONFIG_REPLY; - notify->link_index = cmd.set_config_reply.set_config_reply_control.instance; + notify->instance = cmd.set_config_reply.set_config_reply_control.instance; notify->sc_status = cmd.set_config_reply.set_config_reply_control.status; break; case DMUB_OUT_CMD__DPIA_NOTIFICATION: notify->type = DMUB_NOTIFICATION_DPIA_NOTIFICATION; - notify->link_index = cmd.dpia_notification.payload.header.instance; + notify->instance = cmd.dpia_notification.payload.header.instance; break; case DMUB_OUT_CMD__HPD_SENSE_NOTIFY: notify->type = DMUB_NOTIFICATION_HPD_SENSE_NOTIFY; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 5fc29164e4b4..8aea50aa9533 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -213,6 +213,11 @@ enum { #endif #define DEVICE_ID_NV_13FE 0x13FE // CYAN_SKILLFISH #define DEVICE_ID_NV_143F 0x143F +#define DEVICE_ID_NV_13F9 0x13F9 +#define DEVICE_ID_NV_13FA 0x13FA +#define DEVICE_ID_NV_13FB 0x13FB +#define DEVICE_ID_NV_13FC 0x13FC +#define DEVICE_ID_NV_13DB 0x13DB #define FAMILY_VGH 144 #define DEVICE_ID_VGH_163F 0x163F #define DEVICE_ID_VGH_1435 0x1435 diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 3ba9b62ba70b..ce421bcddcb0 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -147,7 +147,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh( ((unsigned int)(div64_u64((1000000000ULL * 1000000), refresh_in_uhz))); - if (MICRO_HZ_TO_HZ(refresh_in_uhz) <= stream->timing.min_refresh_in_uhz) { + if (refresh_in_uhz <= stream->timing.min_refresh_in_uhz) { /* When the target refresh rate is the minimum panel refresh rate, * round down the vtotal value to avoid stretching vblank over * panel's vtotal boundary. @@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); + } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { + /* When the target refresh rate is the maximum panel refresh rate + * round up the vtotal value to prevent off-by-one error causing + * v_total_min to be below the panel's lower bound + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + (1000000 - 1), 1000000); } else { v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), @@ -218,8 +226,8 @@ static void update_v_total_for_static_ramp( unsigned int target_duration_in_us = calc_duration_in_us_from_refresh_in_uhz( in_out_vrr->fixed.target_refresh_in_uhz); - bool ramp_direction_is_up = (current_duration_in_us > - target_duration_in_us) ? true : false; + bool ramp_direction_is_up = current_duration_in_us > + target_duration_in_us; /* Calculate ratio between new and current frame duration with 3 digit */ unsigned int frame_duration_ratio = div64_u64(1000000, diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 5e01c6e24cbc..c760216a6240 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -29,6 +29,7 @@ static void push_error_status(struct mod_hdcp *hdcp, enum mod_hdcp_status status) { struct mod_hdcp_trace *trace = &hdcp->connection.trace; + const uint8_t retry_limit = hdcp->connection.link.adjust.retry_limit; if (trace->error_count < MAX_NUM_OF_ERROR_TRACE) { trace->errors[trace->error_count].status = status; @@ -39,11 +40,11 @@ static void push_error_status(struct mod_hdcp *hdcp, if (is_hdcp1(hdcp)) { hdcp->connection.hdcp1_retry_count++; - if (hdcp->connection.hdcp1_retry_count == MAX_NUM_OF_ATTEMPTS) + if (hdcp->connection.hdcp1_retry_count == retry_limit) hdcp->connection.link.adjust.hdcp1.disable = 1; } else if (is_hdcp2(hdcp)) { hdcp->connection.hdcp2_retry_count++; - if (hdcp->connection.hdcp2_retry_count == MAX_NUM_OF_ATTEMPTS) + if (hdcp->connection.hdcp2_retry_count == retry_limit) hdcp->connection.link.adjust.hdcp2.disable = 1; } } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index e58e7b93810b..6b7db8ec9a53 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -260,6 +260,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp) return MOD_HDCP_STATUS_FAILURE; } + if (!display) + return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; mutex_lock(&psp->hdcp_context.mutex); diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index c42468bb70ac..b51ddf2846df 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -220,6 +220,7 @@ struct mod_hdcp_link_adjustment_hdcp2 { struct mod_hdcp_link_adjustment { uint8_t auth_delay; + uint8_t retry_limit; struct mod_hdcp_link_adjustment_hdcp1 hdcp1; struct mod_hdcp_link_adjustment_hdcp2 hdcp2; }; diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index 758a8aa31fbe..391209a3bf29 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -79,4 +79,6 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps); void reset_replay_dsync_error_count(struct dc_link *link); +void change_replay_to_psr(struct dc_link *link); +void change_psr_to_replay(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_cper.h b/drivers/gpu/drm/amd/include/amd_cper.h index 086869264425..a252ee4c7874 100644 --- a/drivers/gpu/drm/amd/include/amd_cper.h +++ b/drivers/gpu/drm/amd/include/amd_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 11374a2cbab8..75efda2969cf 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -239,18 +239,51 @@ enum amd_harvest_ip_mask { AMD_HARVEST_IP_DMU_MASK = 0x4, }; +/** + * enum DC_FEATURE_MASK - Bits that control DC feature defaults + */ enum DC_FEATURE_MASK { //Default value can be found at "uint amdgpu_dc_feature_mask" - DC_FBC_MASK = (1 << 0), //0x1, disabled by default - DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default - DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default - DC_PSR_MASK = (1 << 3), //0x8, disabled by default for dcn < 3.1 - DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default - DC_DISABLE_LTTPR_DP1_4A = (1 << 5), //0x20, disabled by default - DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default - DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default - DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default - DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4 + /** + * @DC_FBC_MASK: (0x1) disabled by default + */ + DC_FBC_MASK = (1 << 0), + /** + * @DC_MULTI_MON_PP_MCLK_SWITCH_MASK: (0x2) enabled by default + */ + DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), + /** + * @DC_DISABLE_FRACTIONAL_PWM_MASK: (0x4) disabled by default + */ + DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), + /** + * @DC_PSR_MASK: (0x8) disabled by default for DCN < 3.1 + */ + DC_PSR_MASK = (1 << 3), + /** + * @DC_EDP_NO_POWER_SEQUENCING: (0x10) disabled by default + */ + DC_EDP_NO_POWER_SEQUENCING = (1 << 4), + /** + * @DC_DISABLE_LTTPR_DP1_4A: (0x20) disabled by default + */ + DC_DISABLE_LTTPR_DP1_4A = (1 << 5), + /** + * @DC_DISABLE_LTTPR_DP2_0: (0x40) disabled by default + */ + DC_DISABLE_LTTPR_DP2_0 = (1 << 6), + /** + * @DC_PSR_ALLOW_SMU_OPT: (0x80) disabled by default + */ + DC_PSR_ALLOW_SMU_OPT = (1 << 7), + /** + * @DC_PSR_ALLOW_MULTI_DISP_OPT: (0x100) disabled by default + */ + DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), + /** + * @DC_REPLAY_MASK: (0x200) disabled by default for DCN < 3.1.4 + */ + DC_REPLAY_MASK = (1 << 9), }; /** @@ -258,64 +291,64 @@ enum DC_FEATURE_MASK { */ enum DC_DEBUG_MASK { /** - * @DC_DISABLE_PIPE_SPLIT: If set, disable pipe-splitting + * @DC_DISABLE_PIPE_SPLIT: (0x1) If set, disable pipe-splitting */ DC_DISABLE_PIPE_SPLIT = 0x1, /** - * @DC_DISABLE_STUTTER: If set, disable memory stutter mode + * @DC_DISABLE_STUTTER: (0x2) If set, disable memory stutter mode */ DC_DISABLE_STUTTER = 0x2, /** - * @DC_DISABLE_DSC: If set, disable display stream compression + * @DC_DISABLE_DSC: (0x4) If set, disable display stream compression */ DC_DISABLE_DSC = 0x4, /** - * @DC_DISABLE_CLOCK_GATING: If set, disable clock gating optimizations + * @DC_DISABLE_CLOCK_GATING: (0x8) If set, disable clock gating optimizations */ DC_DISABLE_CLOCK_GATING = 0x8, /** - * @DC_DISABLE_PSR: If set, disable Panel self refresh v1 and PSR-SU + * @DC_DISABLE_PSR: (0x10) If set, disable Panel self refresh v1 and PSR-SU */ DC_DISABLE_PSR = 0x10, /** - * @DC_FORCE_SUBVP_MCLK_SWITCH: If set, force mclk switch in subvp, even + * @DC_FORCE_SUBVP_MCLK_SWITCH: (0x20) If set, force mclk switch in subvp, even * if mclk switch in vblank is possible */ DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, /** - * @DC_DISABLE_MPO: If set, disable multi-plane offloading + * @DC_DISABLE_MPO: (0x40) If set, disable multi-plane offloading */ DC_DISABLE_MPO = 0x40, /** - * @DC_ENABLE_DPIA_TRACE: If set, enable trace logging for DPIA + * @DC_ENABLE_DPIA_TRACE: (0x80) If set, enable trace logging for DPIA */ DC_ENABLE_DPIA_TRACE = 0x80, /** - * @DC_ENABLE_DML2: If set, force usage of DML2, even if the DCN version + * @DC_ENABLE_DML2: (0x100) If set, force usage of DML2, even if the DCN version * does not default to it. */ DC_ENABLE_DML2 = 0x100, /** - * @DC_DISABLE_PSR_SU: If set, disable PSR SU + * @DC_DISABLE_PSR_SU: (0x200) If set, disable PSR SU */ DC_DISABLE_PSR_SU = 0x200, /** - * @DC_DISABLE_REPLAY: If set, disable Panel Replay + * @DC_DISABLE_REPLAY: (0x400) If set, disable Panel Replay */ DC_DISABLE_REPLAY = 0x400, /** - * @DC_DISABLE_IPS: If set, disable all Idle Power States, all the time. + * @DC_DISABLE_IPS: (0x800) If set, disable all Idle Power States, all the time. * If more than one IPS debug bit is set, the lowest bit takes * precedence. For example, if DC_FORCE_IPS_ENABLE and * DC_DISABLE_IPS_DYNAMIC are set, then DC_DISABLE_IPS_DYNAMIC takes @@ -324,56 +357,57 @@ enum DC_DEBUG_MASK { DC_DISABLE_IPS = 0x800, /** - * @DC_DISABLE_IPS_DYNAMIC: If set, disable all IPS, all the time, + * @DC_DISABLE_IPS_DYNAMIC: (0x1000) If set, disable all IPS, all the time, * *except* when driver goes into suspend. */ DC_DISABLE_IPS_DYNAMIC = 0x1000, /** - * @DC_DISABLE_IPS2_DYNAMIC: If set, disable IPS2 (IPS1 allowed) if + * @DC_DISABLE_IPS2_DYNAMIC: (0x2000) If set, disable IPS2 (IPS1 allowed) if * there is an enabled display. Otherwise, enable all IPS. */ DC_DISABLE_IPS2_DYNAMIC = 0x2000, /** - * @DC_FORCE_IPS_ENABLE: If set, force enable all IPS, all the time. + * @DC_FORCE_IPS_ENABLE: (0x4000) If set, force enable all IPS, all the time. */ DC_FORCE_IPS_ENABLE = 0x4000, /** - * @DC_DISABLE_ACPI_EDID: If set, don't attempt to fetch EDID for + * @DC_DISABLE_ACPI_EDID: (0x8000) If set, don't attempt to fetch EDID for * eDP display from ACPI _DDC method. */ DC_DISABLE_ACPI_EDID = 0x8000, /** - * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver. + * @DC_DISABLE_HDMI_CEC: (0x10000) If set, disable HDMI-CEC feature in amdgpu driver. */ DC_DISABLE_HDMI_CEC = 0x10000, /** - * @DC_DISABLE_SUBVP_FAMS: If set, disable DCN Sub-Viewport & Firmware Assisted + * @DC_DISABLE_SUBVP_FAMS: (0x20000) If set, disable DCN Sub-Viewport & Firmware Assisted * Memory Clock Switching (FAMS) feature in amdgpu driver. */ DC_DISABLE_SUBVP_FAMS = 0x20000, /** - * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: If set, disable support for custom brightness curves + * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: (0x40000) If set, disable support for custom + * brightness curves */ DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000, /** - * @DC_HDCP_LC_FORCE_FW_ENABLE: If set, use HDCP Locality Check FW + * @DC_HDCP_LC_FORCE_FW_ENABLE: (0x80000) If set, use HDCP Locality Check FW * path regardless of reported HW capabilities. */ DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, /** - * @DC_HDCP_LC_ENABLE_SW_FALLBACK: If set, upon HDCP Locality Check FW + * @DC_HDCP_LC_ENABLE_SW_FALLBACK: (0x100000) If set, upon HDCP Locality Check FW * path failure, retry using legacy SW path. */ DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, /** - * @DC_SKIP_DETECTION_LT: If set, skip detection link training + * @DC_SKIP_DETECTION_LT: (0x200000) If set, skip detection link training */ DC_SKIP_DETECTION_LT = 0x200000, }; @@ -396,6 +430,7 @@ enum amd_dpm_forced_level; * (such as allocating any required memory) * @suspend: handles IP specific hw/sw changes for suspend * @resume: handles IP specific hw/sw changes for resume + * @complete: handles IP specific changes after resume * @is_idle: returns current IP block idle status * @wait_for_idle: poll for idle * @check_soft_reset: check soft reset the IP block @@ -427,6 +462,7 @@ struct amd_ip_funcs { int (*prepare_suspend)(struct amdgpu_ip_block *ip_block); int (*suspend)(struct amdgpu_ip_block *ip_block); int (*resume)(struct amdgpu_ip_block *ip_block); + void (*complete)(struct amdgpu_ip_block *ip_block); bool (*is_idle)(struct amdgpu_ip_block *ip_block); int (*wait_for_idle)(struct amdgpu_ip_block *ip_block); bool (*check_soft_reset)(struct amdgpu_ip_block *ip_block); diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h index 9de01ae574c0..067eddd9c62d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_d.h @@ -4115,6 +4115,7 @@ #define mmSCL0_SCL_COEF_RAM_CONFLICT_STATUS 0x1B55 #define mmSCL0_SCL_COEF_RAM_SELECT 0x1B40 #define mmSCL0_SCL_COEF_RAM_TAP_DATA 0x1B41 +#define mmSCL0_SCL_SCALER_ENABLE 0x1B42 #define mmSCL0_SCL_CONTROL 0x1B44 #define mmSCL0_SCL_DEBUG 0x1B6A #define mmSCL0_SCL_DEBUG2 0x1B69 @@ -4144,6 +4145,7 @@ #define mmSCL1_SCL_COEF_RAM_CONFLICT_STATUS 0x1E55 #define mmSCL1_SCL_COEF_RAM_SELECT 0x1E40 #define mmSCL1_SCL_COEF_RAM_TAP_DATA 0x1E41 +#define mmSCL1_SCL_SCALER_ENABLE 0x1E42 #define mmSCL1_SCL_CONTROL 0x1E44 #define mmSCL1_SCL_DEBUG 0x1E6A #define mmSCL1_SCL_DEBUG2 0x1E69 @@ -4173,6 +4175,7 @@ #define mmSCL2_SCL_COEF_RAM_CONFLICT_STATUS 0x4155 #define mmSCL2_SCL_COEF_RAM_SELECT 0x4140 #define mmSCL2_SCL_COEF_RAM_TAP_DATA 0x4141 +#define mmSCL2_SCL_SCALER_ENABLE 0x4142 #define mmSCL2_SCL_CONTROL 0x4144 #define mmSCL2_SCL_DEBUG 0x416A #define mmSCL2_SCL_DEBUG2 0x4169 @@ -4202,6 +4205,7 @@ #define mmSCL3_SCL_COEF_RAM_CONFLICT_STATUS 0x4455 #define mmSCL3_SCL_COEF_RAM_SELECT 0x4440 #define mmSCL3_SCL_COEF_RAM_TAP_DATA 0x4441 +#define mmSCL3_SCL_SCALER_ENABLE 0x4442 #define mmSCL3_SCL_CONTROL 0x4444 #define mmSCL3_SCL_DEBUG 0x446A #define mmSCL3_SCL_DEBUG2 0x4469 @@ -4231,6 +4235,7 @@ #define mmSCL4_SCL_COEF_RAM_CONFLICT_STATUS 0x4755 #define mmSCL4_SCL_COEF_RAM_SELECT 0x4740 #define mmSCL4_SCL_COEF_RAM_TAP_DATA 0x4741 +#define mmSCL4_SCL_SCALER_ENABLE 0x4742 #define mmSCL4_SCL_CONTROL 0x4744 #define mmSCL4_SCL_DEBUG 0x476A #define mmSCL4_SCL_DEBUG2 0x4769 @@ -4260,6 +4265,7 @@ #define mmSCL5_SCL_COEF_RAM_CONFLICT_STATUS 0x4A55 #define mmSCL5_SCL_COEF_RAM_SELECT 0x4A40 #define mmSCL5_SCL_COEF_RAM_TAP_DATA 0x4A41 +#define mmSCL5_SCL_SCALER_ENABLE 0x4A42 #define mmSCL5_SCL_CONTROL 0x4A44 #define mmSCL5_SCL_DEBUG 0x4A6A #define mmSCL5_SCL_DEBUG2 0x4A69 @@ -4287,6 +4293,7 @@ #define mmSCL_COEF_RAM_CONFLICT_STATUS 0x1B55 #define mmSCL_COEF_RAM_SELECT 0x1B40 #define mmSCL_COEF_RAM_TAP_DATA 0x1B41 +#define mmSCL_SCALER_ENABLE 0x1B42 #define mmSCL_CONTROL 0x1B44 #define mmSCL_DEBUG 0x1B6A #define mmSCL_DEBUG2 0x1B69 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h index 2d6a598a6c25..9317a7afa621 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_6_0_sh_mask.h @@ -8650,6 +8650,8 @@ #define REGAMMA_LUT_INDEX__REGAMMA_LUT_INDEX__SHIFT 0x00000000 #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK_MASK 0x00000007L #define REGAMMA_LUT_WRITE_EN_MASK__REGAMMA_LUT_WRITE_EN_MASK__SHIFT 0x00000000 +#define SCL_SCALER_ENABLE__SCL_SCALE_EN_MASK 0x00000001L +#define SCL_SCALER_ENABLE__SCL_SCALE_EN__SHIFT 0x00000000 #define SCL_ALU_CONTROL__SCL_ALU_DISABLE_MASK 0x00000001L #define SCL_ALU_CONTROL__SCL_ALU_DISABLE__SHIFT 0x00000000 #define SCL_BYPASS_CONTROL__SCL_BYPASS_MODE_MASK 0x00000003L diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 5c86423c2e92..3d083010e734 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -211,7 +211,7 @@ atom_bios_string = "ATOM" }; */ -#pragma pack(1) /* BIOS data must use byte aligment*/ +#pragma pack(1) /* BIOS data must use byte alignment*/ enum atombios_image_offset{ OFFSET_TO_ATOM_ROM_HEADER_POINTER = 0x00000048, @@ -255,8 +255,8 @@ struct atom_rom_header_v2_2 uint16_t subsystem_vendor_id; uint16_t subsystem_id; uint16_t pci_info_offset; - uint16_t masterhwfunction_offset; //Offest for SW to get all command function offsets, Don't change the position - uint16_t masterdatatable_offset; //Offest for SW to get all data table offsets, Don't change the position + uint16_t masterhwfunction_offset; //Offset for SW to get all command function offsets, Don't change the position + uint16_t masterdatatable_offset; //Offset for SW to get all data table offsets, Don't change the position uint16_t reserved; uint32_t pspdirtableoffset; }; @@ -453,7 +453,7 @@ struct atom_dtd_format uint8_t refreshrate; }; -/* atom_dtd_format.modemiscinfo defintion */ +/* atom_dtd_format.modemiscinfo definition */ enum atom_dtd_format_modemiscinfo{ ATOM_HSYNC_POLARITY = 0x0002, ATOM_VSYNC_POLARITY = 0x0004, @@ -678,7 +678,7 @@ struct lcd_info_v2_1 uint32_t reserved1[8]; }; -/* lcd_info_v2_1.panel_misc defintion */ +/* lcd_info_v2_1.panel_misc definition */ enum atom_lcd_info_panel_misc{ ATOM_PANEL_MISC_FPDI =0x0002, }; @@ -716,7 +716,7 @@ enum atom_gpio_pin_assignment_gpio_id { /* gpio_id pre-define id for multiple usage */ /* GPIO use to control PCIE_VDDC in certain SLT board */ PCIE_VDDC_CONTROL_GPIO_PINID = 56, - /* if PP_AC_DC_SWITCH_GPIO_PINID in Gpio_Pin_LutTable, AC/DC swithing feature is enable */ + /* if PP_AC_DC_SWITCH_GPIO_PINID in Gpio_Pin_LutTable, AC/DC switching feature is enable */ PP_AC_DC_SWITCH_GPIO_PINID = 60, /* VDDC_REGULATOR_VRHOT_GPIO_PINID in Gpio_Pin_LutTable, VRHot feature is enable */ VDDC_VRHOT_GPIO_PINID = 61, @@ -734,7 +734,7 @@ enum atom_gpio_pin_assignment_gpio_id { struct atom_gpio_pin_lut_v2_1 { struct atom_common_table_header table_header; - /*the real number of this included in the structure is calcualted by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ + /*the real number of this included in the structure is calculated by using the (whole structure size - the header size)/size of atom_gpio_pin_lut */ struct atom_gpio_pin_assignment gpio_pin[]; }; @@ -997,7 +997,7 @@ enum atom_connector_layout_info_mini_type_def { enum atom_display_device_tag_def{ ATOM_DISPLAY_LCD1_SUPPORT = 0x0002, //an embedded display is either an LVDS or eDP signal type of display - ATOM_DISPLAY_LCD2_SUPPORT = 0x0020, //second edp device tag 0x0020 for backward compability + ATOM_DISPLAY_LCD2_SUPPORT = 0x0020, //second edp device tag 0x0020 for backward compatibility ATOM_DISPLAY_DFP1_SUPPORT = 0x0008, ATOM_DISPLAY_DFP2_SUPPORT = 0x0080, ATOM_DISPLAY_DFP3_SUPPORT = 0x0200, @@ -1011,7 +1011,7 @@ struct atom_display_object_path_v2 { uint16_t display_objid; //Connector Object ID or Misc Object ID uint16_t disp_recordoffset; - uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or intenal encoder + uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or internal encoder uint16_t extencoderobjid; //2nd encoder after the first encoder, from the connector point of view; uint16_t encoder_recordoffset; uint16_t extencoder_recordoffset; @@ -1023,7 +1023,7 @@ struct atom_display_object_path_v2 struct atom_display_object_path_v3 { uint16_t display_objid; //Connector Object ID or Misc Object ID uint16_t disp_recordoffset; - uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or intenal encoder + uint16_t encoderobjid; //first encoder closer to the connector, could be either an external or internal encoder uint16_t reserved1; //only on USBC case, otherwise always = 0 uint16_t reserved2; //reserved and always = 0 uint16_t reserved3; //reserved and always = 0 @@ -3547,7 +3547,7 @@ struct atom_voltage_object_header_v4{ enum atom_voltage_object_mode { VOLTAGE_OBJ_GPIO_LUT = 0, //VOLTAGE and GPIO Lookup table ->atom_gpio_voltage_object_v4 - VOLTAGE_OBJ_VR_I2C_INIT_SEQ = 3, //VOLTAGE REGULATOR INIT sequece through I2C -> atom_i2c_voltage_object_v4 + VOLTAGE_OBJ_VR_I2C_INIT_SEQ = 3, //VOLTAGE REGULATOR INIT sequence through I2C -> atom_i2c_voltage_object_v4 VOLTAGE_OBJ_PHASE_LUT = 4, //Set Vregulator Phase lookup table ->atom_gpio_voltage_object_v4 VOLTAGE_OBJ_SVID2 = 7, //Indicate voltage control by SVID2 ->atom_svid2_voltage_object_v4 VOLTAGE_OBJ_EVV = 8, @@ -3585,7 +3585,7 @@ struct atom_gpio_voltage_object_v4 { struct atom_voltage_object_header_v4 header; // voltage mode = VOLTAGE_OBJ_GPIO_LUT or VOLTAGE_OBJ_PHASE_LUT uint8_t gpio_control_id; // default is 0 which indicate control through CG VID mode - uint8_t gpio_entry_num; // indiate the entry numbers of Votlage/Gpio value Look up table + uint8_t gpio_entry_num; // indicate the entry numbers of Votlage/Gpio value Look up table uint8_t phase_delay_us; // phase delay in unit of micro second uint8_t reserved; uint32_t gpio_mask_val; // GPIO Mask value @@ -4507,8 +4507,8 @@ struct amd_acpi_description_header{ struct uefi_acpi_vfct{ struct amd_acpi_description_header sheader; uint8_t tableUUID[16]; //0x24 - uint32_t vbiosimageoffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the stucture. - uint32_t lib1Imageoffset; //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the stucture. + uint32_t vbiosimageoffset; //0x34. Offset to the first GOP_VBIOS_CONTENT block from the beginning of the structure. + uint32_t lib1Imageoffset; //0x38. Offset to the first GOP_LIB1_CONTENT block from the beginning of the structure. uint32_t reserved[4]; //0x3C }; @@ -4540,7 +4540,7 @@ struct gop_lib1_content { /* *************************************************************************** Scratch Register definitions - Each number below indicates which scratch regiser request, Active and + Each number below indicates which scratch register request, Active and Connect all share the same definitions as display_device_tag defines *************************************************************************** */ diff --git a/drivers/gpu/drm/amd/include/dm_pp_interface.h b/drivers/gpu/drm/amd/include/dm_pp_interface.h index acd1cef61b7c..349544504c93 100644 --- a/drivers/gpu/drm/amd/include/dm_pp_interface.h +++ b/drivers/gpu/drm/amd/include/dm_pp_interface.h @@ -65,6 +65,7 @@ struct single_display_configuration { uint32_t view_resolution_cy; enum amd_pp_display_config_type displayconfigtype; uint32_t vertical_refresh; /* for active display */ + uint32_t pixel_clock; /* Pixel clock in KHz (for HDMI only: normalized) */ }; #define MAX_NUM_DISPLAY 32 diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h index 64b553e7de1a..e7fdcee22a71 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index f4d914dc731f..2b0cdb2a2775 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -30,6 +30,12 @@ extern const struct amdgpu_ip_block_version smu_v12_0_ip_block; extern const struct amdgpu_ip_block_version smu_v13_0_ip_block; extern const struct amdgpu_ip_block_version smu_v14_0_ip_block; +enum smu_temp_metric_type { + SMU_TEMP_METRIC_BASEBOARD, + SMU_TEMP_METRIC_GPUBOARD, + SMU_TEMP_METRIC_MAX, +}; + enum smu_event_type { SMU_EVENT_RESET_COMPLETE = 0, }; @@ -108,6 +114,8 @@ enum pp_clock_type { PP_VCLK1, PP_DCLK, PP_DCLK1, + PP_ISPICLK, + PP_ISPXCLK, OD_SCLK, OD_MCLK, OD_VDDC_CURVE, @@ -154,6 +162,10 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK, AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK, AMDGPU_PP_SENSOR_VCN_LOAD, + AMDGPU_PP_SENSOR_NODEPOWERLIMIT, + AMDGPU_PP_SENSOR_NODEPOWER, + AMDGPU_PP_SENSOR_GPPTRESIDENCY, + AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, }; enum amd_pp_task { @@ -494,6 +506,8 @@ struct amd_pm_funcs { int (*set_df_cstate)(void *handle, enum pp_df_cstate state); int (*set_xgmi_pstate)(void *handle, uint32_t pstate); ssize_t (*get_gpu_metrics)(void *handle, void **table); + ssize_t (*get_temp_metrics)(void *handle, enum smu_temp_metric_type type, void *table); + bool (*temp_metrics_is_supported)(void *handle, enum smu_temp_metric_type type); ssize_t (*get_xcp_metrics)(void *handle, int xcp_id, void *table); ssize_t (*get_pm_metrics)(void *handle, void *pmmetrics, size_t size); int (*set_watermarks_for_clock_ranges)(void *handle, @@ -1593,6 +1607,79 @@ struct amdgpu_pm_metrics { uint8_t data[]; }; +enum amdgpu_vr_temp { + AMDGPU_VDDCR_VDD0_TEMP, + AMDGPU_VDDCR_VDD1_TEMP, + AMDGPU_VDDCR_VDD2_TEMP, + AMDGPU_VDDCR_VDD3_TEMP, + AMDGPU_VDDCR_SOC_A_TEMP, + AMDGPU_VDDCR_SOC_C_TEMP, + AMDGPU_VDDCR_SOCIO_A_TEMP, + AMDGPU_VDDCR_SOCIO_C_TEMP, + AMDGPU_VDD_085_HBM_TEMP, + AMDGPU_VDDCR_11_HBM_B_TEMP, + AMDGPU_VDDCR_11_HBM_D_TEMP, + AMDGPU_VDD_USR_TEMP, + AMDGPU_VDDIO_11_E32_TEMP, + AMDGPU_VR_MAX_TEMP_ENTRIES, +}; + +enum amdgpu_system_temp { + AMDGPU_UBB_FPGA_TEMP, + AMDGPU_UBB_FRONT_TEMP, + AMDGPU_UBB_BACK_TEMP, + AMDGPU_UBB_OAM7_TEMP, + AMDGPU_UBB_IBC_TEMP, + AMDGPU_UBB_UFPGA_TEMP, + AMDGPU_UBB_OAM1_TEMP, + AMDGPU_OAM_0_1_HSC_TEMP, + AMDGPU_OAM_2_3_HSC_TEMP, + AMDGPU_OAM_4_5_HSC_TEMP, + AMDGPU_OAM_6_7_HSC_TEMP, + AMDGPU_UBB_FPGA_0V72_VR_TEMP, + AMDGPU_UBB_FPGA_3V3_VR_TEMP, + AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP, + AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP, + AMDGPU_RETIMER_0_1_0V9_VR_TEMP, + AMDGPU_RETIMER_4_5_0V9_VR_TEMP, + AMDGPU_RETIMER_2_3_0V9_VR_TEMP, + AMDGPU_RETIMER_6_7_0V9_VR_TEMP, + AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP, + AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP, + AMDGPU_IBC_HSC_TEMP, + AMDGPU_IBC_TEMP, + AMDGPU_SYSTEM_MAX_TEMP_ENTRIES = 32, +}; + +enum amdgpu_node_temp { + AMDGPU_RETIMER_X_TEMP, + AMDGPU_OAM_X_IBC_TEMP, + AMDGPU_OAM_X_IBC_2_TEMP, + AMDGPU_OAM_X_VDD18_VR_TEMP, + AMDGPU_OAM_X_04_HBM_B_VR_TEMP, + AMDGPU_OAM_X_04_HBM_D_VR_TEMP, + AMDGPU_NODE_MAX_TEMP_ENTRIES = 12, +}; + +struct amdgpu_gpuboard_temp_metrics_v1_0 { + struct metrics_table_header common_header; + uint16_t label_version; + uint16_t node_id; + uint64_t accumulation_counter; + /* Encoded temperature in Celcius, 24:31 is sensor id 0:23 is temp value */ + uint32_t node_temp[AMDGPU_NODE_MAX_TEMP_ENTRIES]; + uint32_t vr_temp[AMDGPU_VR_MAX_TEMP_ENTRIES]; +}; + +struct amdgpu_baseboard_temp_metrics_v1_0 { + struct metrics_table_header common_header; + uint16_t label_version; + uint16_t node_id; + uint64_t accumulation_counter; + /* Encoded temperature in Celcius, 24:31 is sensor id 0:23 is temp value */ + uint32_t system_temp[AMDGPU_SYSTEM_MAX_TEMP_ENTRIES]; +}; + struct amdgpu_partition_metrics_v1_0 { struct metrics_table_header common_header; /* Current clocks (Mhz) */ diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index 15680c3f4970..ab1cfc92dbeb 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t enable_mes_sch_stb_log : 1; uint32_t limit_single_process : 1; uint32_t is_strix_tmz_wa_enabled :1; - uint32_t reserved : 13; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 12; }; uint32_t uint32_t_all; }; diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index d85ffab2aff9..69611c7e30e3 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -66,6 +66,7 @@ enum MES_SCH_API_OPCODE { MES_SCH_API_SET_SE_MODE = 17, MES_SCH_API_SET_GANG_SUBMIT = 18, MES_SCH_API_SET_HW_RSRC_1 = 19, + MES_SCH_API_INV_TLBS = 20, MES_SCH_API_MAX = 0xFF }; @@ -286,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES { uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; uint32_t enable_mes_fence_int: 1; - uint32_t reserved : 10; + uint32_t enable_lr_compute_wa : 1; + uint32_t reserved : 9; }; uint32_t uint32_all; }; @@ -870,6 +872,35 @@ union MESAPI__SET_GANG_SUBMIT { uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; }; +/* + * @inv_sel 0-select pasid as input to do the invalidation , 1-select vmid + * @flush_type 0-old style, 1-light weight, 2-heavyweight, 3-heavyweight2 + * @inv_sel_id specific pasid when inv_sel is 0 and specific vmid if inv_sel is 1 + * @hub_id 0-gc_hub, 1-mm_hub + */ +struct INV_TLBS { + uint8_t inv_sel; + uint8_t flush_type; + uint16_t inv_sel_id; + uint32_t hub_id; + /* If following two inv_range setting are all 0 , whole VM will be invalidated, + * otherwise only required range be invalidated + */ + uint64_t inv_range_va_start; + uint64_t inv_range_size; + uint64_t reserved; +}; + +union MESAPI__INV_TLBS { + struct { + union MES_API_HEADER header; + struct MES_API_STATUS api_status; + struct INV_TLBS invalidate_tlbs; + }; + + uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS]; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 5c1cbdc122d2..518d07afc7df 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -98,6 +98,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, case AMD_IP_BLOCK_TYPE_GMC: case AMD_IP_BLOCK_TYPE_ACP: case AMD_IP_BLOCK_TYPE_VPE: + case AMD_IP_BLOCK_TYPE_ISP: if (pp_funcs && pp_funcs->set_powergating_by_smu) ret = (pp_funcs->set_powergating_by_smu( (adev)->powerplay.pp_handle, block_type, gate, 0)); @@ -763,10 +764,6 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) ret = smu_send_rma_reason(smu); mutex_unlock(&adev->pm.mutex); - if (adev->cper.enabled) - if (amdgpu_cper_generate_bp_threshold_record(adev)) - dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); - return ret; } @@ -823,6 +820,21 @@ int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask) return ret; } +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + bool ret; + + if (!is_support_sw_smu(adev)) + return false; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_vcn_is_supported(smu); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, @@ -852,22 +864,16 @@ int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev, uint32_t max) { struct smu_context *smu = adev->powerplay.pp_handle; - int ret = 0; - - if (type != PP_SCLK) - return -EINVAL; if (!is_support_sw_smu(adev)) return -EOPNOTSUPP; - mutex_lock(&adev->pm.mutex); - ret = smu_set_soft_freq_range(smu, - SMU_SCLK, + guard(mutex)(&adev->pm.mutex); + + return smu_set_soft_freq_range(smu, + type, min, max); - mutex_unlock(&adev->pm.mutex); - - return ret; } int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev) @@ -2043,6 +2049,66 @@ int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev, } /** + * amdgpu_dpm_get_temp_metrics - Retrieve metrics for a specific compute + * partition + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * @table: Pointer to a buffer where the metrics will be stored. If NULL, the + * function returns the size of the metrics structure. + * + * This function retrieves metrics for a specific temperature type, If the + * table parameter is NULL, the function returns the size of the metrics + * structure without populating it. + * + * Return: Size of the metrics structure on success, or a negative error code on failure. + */ +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret; + + if (!pp_funcs->get_temp_metrics || + !amdgpu_dpm_is_temp_metrics_supported(adev, type)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->get_temp_metrics(adev->powerplay.pp_handle, type, table); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +/** + * amdgpu_dpm_is_temp_metrics_supported - Return if specific temperature metrics support + * is available + * @adev: Pointer to the device. + * @type: Identifier for the temperature type metrics to be fetched. + * + * This function returns metrics if specific temperature metrics type is supported or not. + * + * Return: True in case of metrics type supported else false. + */ +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + bool support_temp_metrics = false; + + if (!pp_funcs->temp_metrics_is_supported) + return support_temp_metrics; + + if (is_support_sw_smu(adev)) { + mutex_lock(&adev->pm.mutex); + support_temp_metrics = + pp_funcs->temp_metrics_is_supported(adev->powerplay.pp_handle, type); + mutex_unlock(&adev->pm.mutex); + } + + return support_temp_metrics; +} + +/** * amdgpu_dpm_get_xcp_metrics - Retrieve metrics for a specific compute * partition * @adev: Pointer to the device. diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c index 42efe838fa85..b5e9c3ecf703 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm_internal.c @@ -27,69 +27,69 @@ #include "amdgpu_smu.h" #include "amdgpu_dpm_internal.h" -void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) +void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev) { struct drm_device *ddev = adev_to_drm(adev); + struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; + struct single_display_configuration *display_cfg; struct drm_crtc *crtc; struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_connector *conn; + int num_crtcs = 0; + int vrefresh; + u32 vblank_in_pixels, vblank_time_us; + + cfg->min_vblank_time = 0xffffffff; /* if the displays are off, vblank time is max */ - adev->pm.dpm.new_active_crtcs = 0; - adev->pm.dpm.new_active_crtc_count = 0; if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { + list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) { amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->enabled) { - adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); - adev->pm.dpm.new_active_crtc_count++; - } - } - } -} -u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; - u32 vblank_in_pixels; - u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */ + /* The array should only contain active displays. */ + if (!amdgpu_crtc->enabled) + continue; + + conn = to_amdgpu_connector(amdgpu_crtc->connector); + display_cfg = &adev->pm.pm_display_cfg.displays[num_crtcs++]; + + if (amdgpu_crtc->hw_mode.clock) { + vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) { vblank_in_pixels = amdgpu_crtc->hw_mode.crtc_htotal * (amdgpu_crtc->hw_mode.crtc_vblank_end - amdgpu_crtc->hw_mode.crtc_vdisplay + (amdgpu_crtc->v_border * 2)); - vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; - break; - } - } - } + vblank_time_us = + vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock; - return vblank_time_us; -} + /* The legacy (non-DC) code has issues with mclk switching + * with refresh rates over 120 Hz. Disable mclk switching. + */ + if (vrefresh > 120) + vblank_time_us = 0; -u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; - u32 vrefresh = 0; + /* Find minimum vblank time. */ + if (vblank_time_us < cfg->min_vblank_time) + cfg->min_vblank_time = vblank_time_us; - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) { - vrefresh = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - break; + /* Find vertical refresh rate of first active display. */ + if (!cfg->vrefresh) + cfg->vrefresh = vrefresh; } + + if (amdgpu_crtc->crtc_id < cfg->crtc_index) { + /* Find first active CRTC and its line time. */ + cfg->crtc_index = amdgpu_crtc->crtc_id; + cfg->line_time_in_us = amdgpu_crtc->line_time; + } + + display_cfg->controller_id = amdgpu_crtc->crtc_id; + display_cfg->pixel_clock = conn->pixelclock_for_modeset; } } - return vrefresh; + cfg->display_clk = adev->clock.default_dispclk; + cfg->num_display = num_crtcs; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index edd9895b46c0..b5fbb0fd1dc0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -110,9 +110,10 @@ static int amdgpu_pm_dev_state_check(struct amdgpu_device *adev, bool runpm) bool runpm_check = runpm ? adev->in_runpm : false; if (amdgpu_in_reset(adev)) - return -EPERM; + return -EBUSY; + if (adev->in_suspend && !runpm_check) - return -EPERM; + return -EBUSY; return 0; } @@ -1398,6 +1399,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, if (ret) return -EINVAL; parameter_size++; + if (!tmp_str) + break; while (isspace(*tmp_str)) tmp_str++; } @@ -1418,9 +1421,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, return -EINVAL; } -static int amdgpu_hwmon_get_sensor_generic(struct amdgpu_device *adev, - enum amd_pp_sensors sensor, - void *query) +static int amdgpu_pm_get_sensor_generic(struct amdgpu_device *adev, + enum amd_pp_sensors sensor, + void *query) { int r, size = sizeof(uint32_t); @@ -1453,7 +1456,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value); if (r) return r; @@ -1477,7 +1480,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_LOAD, &value); if (r) return r; @@ -1501,7 +1504,7 @@ static ssize_t amdgpu_get_vcn_busy_percent(struct device *dev, unsigned int value; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VCN_LOAD, &value); if (r) return r; @@ -1780,7 +1783,7 @@ static int amdgpu_show_powershift_percent(struct device *dev, uint32_t ss_power; int r = 0, i; - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power); if (r == -EOPNOTSUPP) { /* sensor not available on dGPU, try to read from APU */ adev = NULL; @@ -1793,7 +1796,7 @@ static int amdgpu_show_powershift_percent(struct device *dev, } mutex_unlock(&mgpu_info.mutex); if (adev) - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&ss_power); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&ss_power); } if (r) @@ -1890,7 +1893,7 @@ out: static int ss_power_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, uint32_t mask, enum amdgpu_device_attr_states *states) { - if (!amdgpu_device_supports_smart_shift(adev_to_drm(adev))) + if (!amdgpu_device_supports_smart_shift(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -1901,13 +1904,13 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ { uint32_t ss_power; - if (!amdgpu_device_supports_smart_shift(adev_to_drm(adev))) + if (!amdgpu_device_supports_smart_shift(adev)) *states = ATTR_STATE_UNSUPPORTED; - else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE, - (void *)&ss_power)) + else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE, + (void *)&ss_power)) *states = ATTR_STATE_UNSUPPORTED; - else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE, - (void *)&ss_power)) + else if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_DGPU_SHARE, + (void *)&ss_power)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -2071,6 +2074,265 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd return 0; } +/** + * DOC: board + * + * Certain SOCs can support various board attributes reporting. This is useful + * for user application to monitor various board reated attributes. + * + * The amdgpu driver provides a sysfs API for reporting board attributes. Presently, + * seven types of attributes are reported. Baseboard temperature and + * gpu board temperature are reported as binary files. Npm status, current node power limit, + * max node power limit, node power and global ppt residency is reported as ASCII text file. + * + * * .. code-block:: console + * + * hexdump /sys/bus/pci/devices/.../board/baseboard_temp + * + * hexdump /sys/bus/pci/devices/.../board/gpuboard_temp + * + * hexdump /sys/bus/pci/devices/.../board/npm_status + * + * hexdump /sys/bus/pci/devices/.../board/cur_node_power_limit + * + * hexdump /sys/bus/pci/devices/.../board/max_node_power_limit + * + * hexdump /sys/bus/pci/devices/.../board/node_power + * + * hexdump /sys/bus/pci/devices/.../board/global_ppt_resid + */ + +/** + * DOC: baseboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current baseboard + * temperature metrics data. The file baseboard_temp is used for this. + * Reading the file will dump all the current baseboard temperature metrics data. + */ +static ssize_t amdgpu_get_baseboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +/** + * DOC: gpuboard_temp + * + * The amdgpu driver provides a sysfs API for retrieving current gpuboard + * temperature metrics data. The file gpuboard_temp is used for this. + * Reading the file will dump all the current gpuboard temperature metrics data. + */ +static ssize_t amdgpu_get_gpuboard_temp_metrics(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + ret = amdgpu_pm_get_access_if_active(adev); + if (ret) + return ret; + + size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, NULL); + if (size <= 0) + goto out; + if (size >= PAGE_SIZE) { + ret = -ENOSPC; + goto out; + } + + amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, buf); + +out: + amdgpu_pm_put_access(adev); + + if (ret) + return ret; + + return size; +} + +/** + * DOC: cur_node_power_limit + * + * The amdgpu driver provides a sysfs API for retrieving current node power limit. + * The file cur_node_power_limit is used for this. + */ +static ssize_t amdgpu_show_cur_node_power_limit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 nplimit; + int r; + + /* get the current node power limit */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWERLIMIT, + (void *)&nplimit); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", nplimit); +} + +/** + * DOC: node_power + * + * The amdgpu driver provides a sysfs API for retrieving current node power. + * The file node_power is used for this. + */ +static ssize_t amdgpu_show_node_power(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 npower; + int r; + + /* get the node power */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER, + (void *)&npower); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", npower); +} + +/** + * DOC: npm_status + * + * The amdgpu driver provides a sysfs API for retrieving current node power management status. + * The file npm_status is used for this. It shows the status as enabled or disabled based on + * current node power value. If node power is zero, status is disabled else enabled. + */ +static ssize_t amdgpu_show_npm_status(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 npower; + int r; + + /* get the node power */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_NODEPOWER, + (void *)&npower); + if (r) + return r; + + return sysfs_emit(buf, "%s\n", npower ? "enabled" : "disabled"); +} + +/** + * DOC: global_ppt_resid + * + * The amdgpu driver provides a sysfs API for retrieving global ppt residency. + * The file global_ppt_resid is used for this. + */ +static ssize_t amdgpu_show_global_ppt_resid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 gpptresid; + int r; + + /* get the global ppt residency */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPPTRESIDENCY, + (void *)&gpptresid); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", gpptresid); +} + +/** + * DOC: max_node_power_limit + * + * The amdgpu driver provides a sysfs API for retrieving maximum node power limit. + * The file max_node_power_limit is used for this. + */ +static ssize_t amdgpu_show_max_node_power_limit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + u32 max_nplimit; + int r; + + /* get the max node power limit */ + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, + (void *)&max_nplimit); + if (r) + return r; + + return sysfs_emit(buf, "%u\n", max_nplimit); +} + +static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL); +static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL); +static DEVICE_ATTR(cur_node_power_limit, 0444, amdgpu_show_cur_node_power_limit, NULL); +static DEVICE_ATTR(node_power, 0444, amdgpu_show_node_power, NULL); +static DEVICE_ATTR(global_ppt_resid, 0444, amdgpu_show_global_ppt_resid, NULL); +static DEVICE_ATTR(max_node_power_limit, 0444, amdgpu_show_max_node_power_limit, NULL); +static DEVICE_ATTR(npm_status, 0444, amdgpu_show_npm_status, NULL); + +static struct attribute *board_attrs[] = { + &dev_attr_baseboard_temp.attr, + &dev_attr_gpuboard_temp.attr, + NULL +}; + +static umode_t amdgpu_board_attr_visible(struct kobject *kobj, struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_baseboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_BASEBOARD)) + return 0; + } + + if (attr == &dev_attr_gpuboard_temp.attr) { + if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) + return 0; + } + + return attr->mode; +} + +const struct attribute_group amdgpu_board_attr_group = { + .name = "board", + .attrs = board_attrs, + .is_visible = amdgpu_board_attr_visible, +}; + /* pm policy attributes */ struct amdgpu_pm_policy_attr { struct device_attribute dev_attr; @@ -2505,18 +2767,18 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, switch (channel) { case PP_TEMP_JUNCTION: /* get current junction temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp); break; case PP_TEMP_EDGE: /* get current edge temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp); break; case PP_TEMP_MEM: /* get current memory temperature */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP, - (void *)&temp); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp); break; default: r = -EINVAL; @@ -2778,8 +3040,8 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 min_rpm = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, - (void *)&min_rpm); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, + (void *)&min_rpm); if (r) return r; @@ -2795,8 +3057,8 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 max_rpm = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, - (void *)&max_rpm); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, + (void *)&max_rpm); if (r) return r; @@ -2929,8 +3191,8 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, int r; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX, - (void *)&vddgfx); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDGFX, + (void *)&vddgfx); if (r) return r; @@ -2946,8 +3208,8 @@ static ssize_t amdgpu_hwmon_show_vddboard(struct device *dev, int r; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, - (void *)&vddboard); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&vddboard); if (r) return r; @@ -2980,8 +3242,8 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, return -EINVAL; /* get the voltage */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB, - (void *)&vddnb); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDNB, + (void *)&vddnb); if (r) return r; @@ -3003,7 +3265,7 @@ static int amdgpu_hwmon_get_power(struct device *dev, u32 query = 0; int r; - r = amdgpu_hwmon_get_sensor_generic(adev, sensor, (void *)&query); + r = amdgpu_pm_get_sensor_generic(adev, sensor, (void *)&query); if (r) return r; @@ -3123,9 +3385,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - err = kstrtou32(buf, 10, &value); if (err) return err; @@ -3156,8 +3415,8 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, int r; /* get the sclk */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK, - (void *)&sclk); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_SCLK, + (void *)&sclk); if (r) return r; @@ -3180,8 +3439,8 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, int r; /* get the sclk */ - r = amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK, - (void *)&mclk); + r = amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GFX_MCLK, + (void *)&mclk); if (r) return r; @@ -3456,14 +3715,20 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; /* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */ - if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) && - (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) - return 0; + if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) { + if (adev->family == AMDGPU_FAMILY_SI || + ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) && + (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) || + (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3))) + return 0; + } + + if (attr == &sensor_dev_attr_power1_cap.dev_attr.attr && + amdgpu_virt_cap_is_rw(&adev->virt.virt_caps, AMDGPU_VIRT_CAP_POWER_LIMIT)) + effective_mode |= S_IWUSR; /* not implemented yet for APUs having < GC 9.3.0 (Renoir) */ if (((adev->family == AMDGPU_FAMILY_SI) || @@ -3473,10 +3738,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* not all products support both average and instantaneous */ if (attr == &sensor_dev_attr_power1_average.dev_attr.attr && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, + (void *)&tmp) == -EOPNOTSUPP) return 0; if (attr == &sensor_dev_attr_power1_input.dev_attr.attr && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&tmp) == -EOPNOTSUPP) return 0; /* hide max/min values if we can't both query and manage the fan */ @@ -3515,8 +3782,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* only few boards support vddboard */ if ((attr == &sensor_dev_attr_in2_input.dev_attr.attr || attr == &sensor_dev_attr_in2_label.dev_attr.attr) && - amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, - (void *)&tmp) == -EOPNOTSUPP) + amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_VDDBOARD, + (void *)&tmp) == -EOPNOTSUPP) return 0; /* no mclk on APUs other than gc 9,4,3*/ @@ -3645,6 +3912,9 @@ static int parse_input_od_command_lines(const char *buf, return -EINVAL; parameter_size++; + if (!tmp_str) + break; + while (isspace(*tmp_str)) tmp_str++; } @@ -4395,6 +4665,7 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { enum amdgpu_sriov_vf_mode mode; uint32_t mask = 0; + uint32_t tmp; int ret; if (adev->pm.sysfs_initialized) @@ -4456,6 +4727,28 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) goto err_out0; } + if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { + ret = devm_device_add_group(adev->dev, + &amdgpu_board_attr_group); + if (ret) + goto err_out0; + if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, + (void *)&tmp) != -EOPNOTSUPP) { + sysfs_add_file_to_group(&adev->dev->kobj, + &dev_attr_cur_node_power_limit.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_node_power.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_global_ppt_resid.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, + &dev_attr_max_node_power_limit.attr, + amdgpu_board_attr_group.name); + sysfs_add_file_to_group(&adev->dev->kobj, &dev_attr_npm_status.attr, + amdgpu_board_attr_group.name); + } + } + adev->pm.sysfs_initialized = true; return 0; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 768317ee1486..65c1d98af26c 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -263,10 +263,6 @@ struct amdgpu_dpm { u32 voltage_response_time; u32 backbias_response_time; void *priv; - u32 new_active_crtcs; - int new_active_crtc_count; - u32 current_active_crtcs; - int current_active_crtc_count; struct amdgpu_dpm_dynamic_state dyn_state; struct amdgpu_dpm_fan fan; u32 tdp_limit; @@ -526,6 +522,8 @@ int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev, int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table); ssize_t amdgpu_dpm_get_xcp_metrics(struct amdgpu_device *adev, int xcp_id, void *table); +ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev, + enum smu_temp_metric_type type, void *table); /** * @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The @@ -613,5 +611,8 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev); int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask); +bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev, + enum smu_temp_metric_type type); #endif diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h index 5c2a89f0d5d5..cc6d7ba040e9 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm_internal.h @@ -23,10 +23,6 @@ #ifndef __AMDGPU_DPM_INTERNAL_H__ #define __AMDGPU_DPM_INTERNAL_H__ -void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); - -u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); - -u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); +void amdgpu_dpm_get_display_cfg(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 34e71727b27d..33eb85dd68e9 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -1242,7 +1242,7 @@ static void kv_dpm_enable_bapm(void *handle, bool enable) if (pi->bapm_enable) { ret = amdgpu_kv_smc_bapm_enable(adev, enable); if (ret) - DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); + drm_err(adev_to_drm(adev), "amdgpu_kv_smc_bapm_enable failed\n"); } } @@ -1266,40 +1266,40 @@ static int kv_dpm_enable(struct amdgpu_device *adev) ret = kv_process_firmware_header(adev); if (ret) { - DRM_ERROR("kv_process_firmware_header failed\n"); + drm_err(adev_to_drm(adev), "kv_process_firmware_header failed\n"); return ret; } kv_init_fps_limits(adev); kv_init_graphics_levels(adev); ret = kv_program_bootup_state(adev); if (ret) { - DRM_ERROR("kv_program_bootup_state failed\n"); + drm_err(adev_to_drm(adev), "kv_program_bootup_state failed\n"); return ret; } kv_calculate_dfs_bypass_settings(adev); ret = kv_upload_dpm_settings(adev); if (ret) { - DRM_ERROR("kv_upload_dpm_settings failed\n"); + drm_err(adev_to_drm(adev), "kv_upload_dpm_settings failed\n"); return ret; } ret = kv_populate_uvd_table(adev); if (ret) { - DRM_ERROR("kv_populate_uvd_table failed\n"); + drm_err(adev_to_drm(adev), "kv_populate_uvd_table failed\n"); return ret; } ret = kv_populate_vce_table(adev); if (ret) { - DRM_ERROR("kv_populate_vce_table failed\n"); + drm_err(adev_to_drm(adev), "kv_populate_vce_table failed\n"); return ret; } ret = kv_populate_samu_table(adev); if (ret) { - DRM_ERROR("kv_populate_samu_table failed\n"); + drm_err(adev_to_drm(adev), "kv_populate_samu_table failed\n"); return ret; } ret = kv_populate_acp_table(adev); if (ret) { - DRM_ERROR("kv_populate_acp_table failed\n"); + drm_err(adev_to_drm(adev), "kv_populate_acp_table failed\n"); return ret; } kv_program_vc(adev); @@ -1310,39 +1310,39 @@ static int kv_dpm_enable(struct amdgpu_device *adev) if (pi->enable_auto_thermal_throttling) { ret = kv_enable_auto_thermal_throttling(adev); if (ret) { - DRM_ERROR("kv_enable_auto_thermal_throttling failed\n"); + drm_err(adev_to_drm(adev), "kv_enable_auto_thermal_throttling failed\n"); return ret; } } ret = kv_enable_dpm_voltage_scaling(adev); if (ret) { - DRM_ERROR("kv_enable_dpm_voltage_scaling failed\n"); + drm_err(adev_to_drm(adev), "kv_enable_dpm_voltage_scaling failed\n"); return ret; } ret = kv_set_dpm_interval(adev); if (ret) { - DRM_ERROR("kv_set_dpm_interval failed\n"); + drm_err(adev_to_drm(adev), "kv_set_dpm_interval failed\n"); return ret; } ret = kv_set_dpm_boot_state(adev); if (ret) { - DRM_ERROR("kv_set_dpm_boot_state failed\n"); + drm_err(adev_to_drm(adev), "kv_set_dpm_boot_state failed\n"); return ret; } ret = kv_enable_ulv(adev, true); if (ret) { - DRM_ERROR("kv_enable_ulv failed\n"); + drm_err(adev_to_drm(adev), "kv_enable_ulv failed\n"); return ret; } kv_start_dpm(adev); ret = kv_enable_didt(adev, true); if (ret) { - DRM_ERROR("kv_enable_didt failed\n"); + drm_err(adev_to_drm(adev), "kv_enable_didt failed\n"); return ret; } ret = kv_enable_smc_cac(adev, true); if (ret) { - DRM_ERROR("kv_enable_smc_cac failed\n"); + drm_err(adev_to_drm(adev), "kv_enable_smc_cac failed\n"); return ret; } @@ -1350,7 +1350,7 @@ static int kv_dpm_enable(struct amdgpu_device *adev) ret = amdgpu_kv_smc_bapm_enable(adev, false); if (ret) { - DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); + drm_err(adev_to_drm(adev), "amdgpu_kv_smc_bapm_enable failed\n"); return ret; } @@ -1358,7 +1358,7 @@ static int kv_dpm_enable(struct amdgpu_device *adev) kv_is_internal_thermal_sensor(adev->pm.int_thermal_type)) { ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX); if (ret) { - DRM_ERROR("kv_set_thermal_temperature_range failed\n"); + drm_err(adev_to_drm(adev), "kv_set_thermal_temperature_range failed\n"); return ret; } amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq, @@ -1382,7 +1382,7 @@ static void kv_dpm_disable(struct amdgpu_device *adev) err = amdgpu_kv_smc_bapm_enable(adev, false); if (err) - DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); + drm_err(adev_to_drm(adev), "amdgpu_kv_smc_bapm_enable failed\n"); if (adev->asic_type == CHIP_MULLINS) kv_enable_nb_dpm(adev, false); @@ -1920,7 +1920,7 @@ static int kv_dpm_set_power_state(void *handle) if (pi->bapm_enable) { ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power); if (ret) { - DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); + drm_err(adev_to_drm(adev), "amdgpu_kv_smc_bapm_enable failed\n"); return ret; } } @@ -1931,7 +1931,7 @@ static int kv_dpm_set_power_state(void *handle) kv_update_dfs_bypass_settings(adev, new_ps); ret = kv_calculate_ds_divider(adev); if (ret) { - DRM_ERROR("kv_calculate_ds_divider failed\n"); + drm_err(adev_to_drm(adev), "kv_calculate_ds_divider failed\n"); return ret; } kv_calculate_nbps_level_settings(adev); @@ -1947,7 +1947,7 @@ static int kv_dpm_set_power_state(void *handle) ret = kv_update_vce_dpm(adev, new_ps, old_ps); if (ret) { - DRM_ERROR("kv_update_vce_dpm failed\n"); + drm_err(adev_to_drm(adev), "kv_update_vce_dpm failed\n"); return ret; } kv_update_sclk_t(adev); @@ -1960,7 +1960,7 @@ static int kv_dpm_set_power_state(void *handle) kv_update_dfs_bypass_settings(adev, new_ps); ret = kv_calculate_ds_divider(adev); if (ret) { - DRM_ERROR("kv_calculate_ds_divider failed\n"); + drm_err(adev_to_drm(adev), "kv_calculate_ds_divider failed\n"); return ret; } kv_calculate_nbps_level_settings(adev); @@ -1972,7 +1972,7 @@ static int kv_dpm_set_power_state(void *handle) kv_set_enabled_levels(adev); ret = kv_update_vce_dpm(adev, new_ps, old_ps); if (ret) { - DRM_ERROR("kv_update_vce_dpm failed\n"); + drm_err(adev_to_drm(adev), "kv_update_vce_dpm failed\n"); return ret; } kv_update_acp_boot_level(adev); @@ -2299,7 +2299,7 @@ static void kv_apply_state_adjust_rules(struct amdgpu_device *adev, if (pi->sys_info.nb_dpm_enable) { force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) || - pi->video_start || (adev->pm.dpm.new_active_crtc_count >= 3) || + pi->video_start || (adev->pm.pm_display_cfg.num_display >= 3) || pi->disable_nb_ps3_in_battery; ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3; ps->dpm0_pg_nb_ps_hi = 0x2; @@ -2358,7 +2358,7 @@ static int kv_calculate_nbps_level_settings(struct amdgpu_device *adev) return 0; force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) || - (adev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start); + (adev->pm.pm_display_cfg.num_display >= 3) || pi->video_start); if (force_high) { for (i = pi->lowest_valid; i <= pi->highest_valid; i++) @@ -2521,7 +2521,7 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev, if (high_temp > max_temp) high_temp = max_temp; if (high_temp < low_temp) { - DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp); + drm_err(adev_to_drm(adev), "invalid thermal range: %d - %d\n", low_temp, high_temp); return -EINVAL; } @@ -2563,7 +2563,7 @@ static int kv_parse_sys_info_table(struct amdgpu_device *adev) data_offset); if (crev != 8) { - DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev); + drm_err(adev_to_drm(adev), "Unsupported IGP table: %d %d\n", frev, crev); return -EINVAL; } pi->sys_info.bootup_sclk = le32_to_cpu(igp_info->info_8.ulBootUpEngineClock); @@ -2579,7 +2579,7 @@ static int kv_parse_sys_info_table(struct amdgpu_device *adev) else pi->sys_info.htc_hyst_lmt = igp_info->info_8.ucHtcHystLmt; if (pi->sys_info.htc_tmp_lmt <= pi->sys_info.htc_hyst_lmt) { - DRM_ERROR("The htcTmpLmt should be larger than htcHystLmt.\n"); + drm_err(adev_to_drm(adev), "The htcTmpLmt should be larger than htcHystLmt.\n"); } if (le32_to_cpu(igp_info->info_8.ulSystemConfig) & (1 << 3)) @@ -2886,16 +2886,18 @@ kv_dpm_print_power_state(void *handle, void *request_ps) struct kv_ps *ps = kv_get_ps(rps); struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_dpm_print_class_info(rps->class, rps->class2); - amdgpu_dpm_print_cap_info(rps->caps); - printk("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + amdgpu_dpm_dbg_print_class_info(adev, rps->class, rps->class2); + amdgpu_dpm_dbg_print_cap_info(adev, rps->caps); + drm_dbg(adev_to_drm(adev), "vclk: %d, dclk: %d\n", + rps->vclk, rps->dclk); for (i = 0; i < ps->num_levels; i++) { struct kv_pl *pl = &ps->levels[i]; - printk("\t\tpower level %d sclk: %u vddc: %u\n", - i, pl->sclk, - kv_convert_8bit_index_to_voltage(adev, pl->vddc_index)); + drm_dbg(adev_to_drm(adev), + "power level %d sclk: %u vddc: %u\n", + i, pl->sclk, + kv_convert_8bit_index_to_voltage(adev, pl->vddc_index)); } - amdgpu_dpm_print_ps_status(adev, rps); + amdgpu_dpm_dbg_print_ps_status(adev, rps); } static void kv_dpm_fini(struct amdgpu_device *adev) @@ -3013,13 +3015,13 @@ static int kv_dpm_sw_init(struct amdgpu_ip_block *ip_block) adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; if (amdgpu_dpm == 1) amdgpu_pm_print_power_states(adev); - DRM_INFO("amdgpu: dpm initialized\n"); + drm_info(adev_to_drm(adev), "dpm initialized\n"); return 0; dpm_failed: kv_dpm_fini(adev); - DRM_ERROR("amdgpu: dpm initialization failed\n"); + drm_err(adev_to_drm(adev), "dpm initialization failed: %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index c7518b13e787..c7ed0b457129 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -47,7 +47,7 @@ #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \ ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal))) -void amdgpu_dpm_print_class_info(u32 class, u32 class2) +void amdgpu_dpm_dbg_print_class_info(struct amdgpu_device *adev, u32 class, u32 class2) { const char *s; @@ -66,71 +66,45 @@ void amdgpu_dpm_print_class_info(u32 class, u32 class2) s = "performance"; break; } - printk("\tui class: %s\n", s); - printk("\tinternal class:"); + drm_dbg(adev_to_drm(adev), "\tui class: %s\n", s); if (((class & ~ATOM_PPLIB_CLASSIFICATION_UI_MASK) == 0) && (class2 == 0)) - pr_cont(" none"); - else { - if (class & ATOM_PPLIB_CLASSIFICATION_BOOT) - pr_cont(" boot"); - if (class & ATOM_PPLIB_CLASSIFICATION_THERMAL) - pr_cont(" thermal"); - if (class & ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE) - pr_cont(" limited_pwr"); - if (class & ATOM_PPLIB_CLASSIFICATION_REST) - pr_cont(" rest"); - if (class & ATOM_PPLIB_CLASSIFICATION_FORCED) - pr_cont(" forced"); - if (class & ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE) - pr_cont(" 3d_perf"); - if (class & ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE) - pr_cont(" ovrdrv"); - if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) - pr_cont(" uvd"); - if (class & ATOM_PPLIB_CLASSIFICATION_3DLOW) - pr_cont(" 3d_low"); - if (class & ATOM_PPLIB_CLASSIFICATION_ACPI) - pr_cont(" acpi"); - if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE) - pr_cont(" uvd_hd2"); - if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE) - pr_cont(" uvd_hd"); - if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE) - pr_cont(" uvd_sd"); - if (class2 & ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2) - pr_cont(" limited_pwr2"); - if (class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) - pr_cont(" ulv"); - if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC) - pr_cont(" uvd_mvc"); - } - pr_cont("\n"); + drm_dbg(adev_to_drm(adev), "\tinternal class: none\n"); + else + drm_dbg(adev_to_drm(adev), "\tinternal class: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (class & ATOM_PPLIB_CLASSIFICATION_BOOT) ? " boot" : "", + (class & ATOM_PPLIB_CLASSIFICATION_THERMAL) ? " thermal" : "", + (class & ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE) ? " limited_pwr" : "", + (class & ATOM_PPLIB_CLASSIFICATION_REST) ? " rest" : "", + (class & ATOM_PPLIB_CLASSIFICATION_FORCED) ? " forced" : "", + (class & ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE) ? " 3d_perf" : "", + (class & ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE) ? " ovrdrv" : "", + (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) ? " uvd" : "", + (class & ATOM_PPLIB_CLASSIFICATION_3DLOW) ? " 3d_low" : "", + (class & ATOM_PPLIB_CLASSIFICATION_ACPI) ? " acpi" : "", + (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE) ? " uvd_hd2" : "", + (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE) ? " uvd_hd" : "", + (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE) ? " uvd_sd" : "", + (class2 & ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2) ? " limited_pwr2" : "", + (class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) ? " ulv" : "", + (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC) ? " uvd_mvc" : ""); } -void amdgpu_dpm_print_cap_info(u32 caps) +void amdgpu_dpm_dbg_print_cap_info(struct amdgpu_device *adev, u32 caps) { - printk("\tcaps:"); - if (caps & ATOM_PPLIB_SINGLE_DISPLAY_ONLY) - pr_cont(" single_disp"); - if (caps & ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK) - pr_cont(" video"); - if (caps & ATOM_PPLIB_DISALLOW_ON_DC) - pr_cont(" no_dc"); - pr_cont("\n"); + drm_dbg(adev_to_drm(adev), "\tcaps: %s%s%s\n", + (caps & ATOM_PPLIB_SINGLE_DISPLAY_ONLY) ? " single_disp" : "", + (caps & ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK) ? " video" : "", + (caps & ATOM_PPLIB_DISALLOW_ON_DC) ? " no_dc" : ""); } -void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, +void amdgpu_dpm_dbg_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps) { - printk("\tstatus:"); - if (rps == adev->pm.dpm.current_ps) - pr_cont(" c"); - if (rps == adev->pm.dpm.requested_ps) - pr_cont(" r"); - if (rps == adev->pm.dpm.boot_ps) - pr_cont(" b"); - pr_cont("\n"); + drm_dbg(adev_to_drm(adev), "\tstatus:%s%s%s\n", + rps == adev->pm.dpm.current_ps ? " c" : "", + rps == adev->pm.dpm.requested_ps ? " r" : "", + rps == adev->pm.dpm.boot_ps ? " b" : ""); } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) @@ -699,64 +673,64 @@ void amdgpu_add_thermal_controller(struct amdgpu_device *adev) adev->pm.fan_max_rpm = controller->ucFanMaxRPM; } if (controller->ucType == ATOM_PP_THERMALCONTROLLER_RV6xx) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_RV6XX; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_RV770) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_RV770; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_EVERGREEN) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_EVERGREEN; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_SUMO) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_SUMO; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_NISLANDS) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_NI; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_SISLANDS) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_SI; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_CISLANDS) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_CI; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_KAVERI) { - DRM_INFO("Internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "Internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_KV; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) { - DRM_INFO("External GPIO thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "External GPIO thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_EXTERNAL_GPIO; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL) { - DRM_INFO("ADT7473 with internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "ADT7473 with internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_ADT7473_WITH_INTERNAL; } else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL) { - DRM_INFO("EMC2103 with internal thermal controller %s fan control\n", + drm_info(adev_to_drm(adev), "EMC2103 with internal thermal controller %s fan control\n", (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); adev->pm.int_thermal_type = THERMAL_TYPE_EMC2103_WITH_INTERNAL; } else if (controller->ucType < ARRAY_SIZE(pp_lib_thermal_controller_names)) { - DRM_INFO("Possible %s thermal controller at 0x%02x %s fan control\n", + drm_info(adev_to_drm(adev), "Possible %s thermal controller at 0x%02x %s fan control\n", pp_lib_thermal_controller_names[controller->ucType], controller->ucI2cAddress >> 1, (controller->ucFanParameters & @@ -772,7 +746,7 @@ void amdgpu_add_thermal_controller(struct amdgpu_device *adev) i2c_new_client_device(&adev->pm.i2c_bus->adapter, &info); } } else { - DRM_INFO("Unknown thermal controller type %d at 0x%02x %s fan control\n", + drm_info(adev_to_drm(adev), "Unknown thermal controller type %d at 0x%02x %s fan control\n", controller->ucType, controller->ucI2cAddress >> 1, (controller->ucFanParameters & @@ -797,8 +771,7 @@ static struct amdgpu_ps *amdgpu_dpm_pick_power_state(struct amdgpu_device *adev, int i; struct amdgpu_ps *ps; u32 ui_class; - bool single_display = (adev->pm.dpm.new_active_crtc_count < 2) ? - true : false; + bool single_display = adev->pm.pm_display_cfg.num_display < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && adev->powerplay.pp_funcs->vblank_too_short) { @@ -943,9 +916,9 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) return -EINVAL; if (amdgpu_dpm == 1 && pp_funcs->print_power_state) { - printk("switching from power state:\n"); + drm_dbg(adev_to_drm(adev), "switching from power state\n"); amdgpu_dpm_print_power_state(adev, adev->pm.dpm.current_ps); - printk("switching to power state:\n"); + drm_dbg(adev_to_drm(adev), "switching to power state\n"); amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps); } @@ -971,9 +944,6 @@ static int amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev) amdgpu_dpm_post_set_power_state(adev); - adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; - adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; - if (pp_funcs->force_performance_level) { if (adev->pm.dpm.thermal_active) { enum amd_dpm_forced_level level = adev->pm.dpm.forced_level; @@ -994,7 +964,8 @@ void amdgpu_legacy_dpm_compute_clocks(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_dpm_get_active_displays(adev); + if (!adev->dc_enabled) + amdgpu_dpm_get_display_cfg(adev); amdgpu_dpm_change_power_state_locked(adev); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h index 93bd3973330c..7120eef30509 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.h @@ -23,10 +23,9 @@ #ifndef __LEGACY_DPM_H__ #define __LEGACY_DPM_H__ -void amdgpu_dpm_print_class_info(u32 class, u32 class2); -void amdgpu_dpm_print_cap_info(u32 caps); -void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, - struct amdgpu_ps *rps); +void amdgpu_dpm_dbg_print_class_info(struct amdgpu_device *adev, u32 class, u32 class2); +void amdgpu_dpm_dbg_print_cap_info(struct amdgpu_device *adev, u32 caps); +void amdgpu_dpm_dbg_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps); int amdgpu_get_platform_caps(struct amdgpu_device *adev); int amdgpu_parse_extended_power_table(struct amdgpu_device *adev); void amdgpu_free_extended_power_table(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 4c0e976004ba..3a9522c17fee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3081,11 +3081,17 @@ static int si_get_vce_clock_voltage(struct amdgpu_device *adev, static bool si_dpm_vblank_too_short(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); + u32 vblank_time = adev->pm.pm_display_cfg.min_vblank_time; /* we never hit the non-gddr5 limit so disable it */ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0; - if (vblank_time < switch_limit) + /* Consider zero vblank time too short and disable MCLK switching. + * Note that the vblank time is set to maximum when no displays are attached, + * so we'll still enable MCLK switching in that case. + */ + if (vblank_time == 0) + return true; + else if (vblank_time < switch_limit) return true; else return false; @@ -3441,6 +3447,8 @@ static void rv770_get_engine_memory_ss(struct amdgpu_device *adev) static void si_apply_state_adjust_rules(struct amdgpu_device *adev, struct amdgpu_ps *rps) { + const struct amd_pp_display_configuration *display_cfg = + &adev->pm.pm_display_cfg; struct si_ps *ps = si_get_ps(rps); struct amdgpu_clock_and_voltage_limits *max_limits; bool disable_mclk_switching = false; @@ -3449,6 +3457,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, u16 vddc, vddci, min_vce_voltage = 0; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; u32 max_sclk = 0, max_mclk = 0; + u32 high_pixelclock_count = 0; int i; if (adev->asic_type == CHIP_HAINAN) { @@ -3476,6 +3485,35 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, } } + /* We define "high pixelclock" for SI as higher than necessary for 4K 30Hz. + * For example, 4K 60Hz and 1080p 144Hz fall into this category. + * Find number of such displays connected. + */ + for (i = 0; i < display_cfg->num_display; i++) { + /* The array only contains active displays. */ + if (display_cfg->displays[i].pixel_clock > 297000) + high_pixelclock_count++; + } + + /* These are some ad-hoc fixes to some issues observed with SI GPUs. + * They are necessary because we don't have something like dce_calcs + * for these GPUs to calculate bandwidth requirements. + */ + if (high_pixelclock_count) { + /* Work around flickering lines at the bottom edge + * of the screen when using a single 4K 60Hz monitor. + */ + disable_mclk_switching = true; + + /* On Oland, we observe some flickering when two 4K 60Hz + * displays are connected, possibly because voltage is too low. + * Raise the voltage by requiring a higher SCLK. + * (Voltage cannot be adjusted independently without also SCLK.) + */ + if (high_pixelclock_count > 1 && adev->asic_type == CHIP_OLAND) + disable_sclk_switching = true; + } + if (rps->vce_active) { rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk; rps->ecclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].ecclk; @@ -3486,7 +3524,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, rps->ecclk = 0; } - if ((adev->pm.dpm.new_active_crtc_count > 1) || + if ((adev->pm.pm_display_cfg.num_display > 1) || si_dpm_vblank_too_short(adev)) disable_mclk_switching = true; @@ -3634,7 +3672,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, ps->performance_levels[i].mclk, max_limits->vddc, &ps->performance_levels[i].vddc); btc_apply_voltage_dependency_rules(&adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk, - adev->clock.current_dispclk, + display_cfg->display_clk, max_limits->vddc, &ps->performance_levels[i].vddc); } @@ -4159,16 +4197,16 @@ static void si_program_ds_registers(struct amdgpu_device *adev) static void si_program_display_gap(struct amdgpu_device *adev) { + const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; u32 tmp, pipe; - int i; tmp = RREG32(mmCG_DISPLAY_GAP_CNTL) & ~(CG_DISPLAY_GAP_CNTL__DISP1_GAP_MASK | CG_DISPLAY_GAP_CNTL__DISP2_GAP_MASK); - if (adev->pm.dpm.new_active_crtc_count > 0) + if (cfg->num_display > 0) tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; else tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP1_GAP__SHIFT; - if (adev->pm.dpm.new_active_crtc_count > 1) + if (cfg->num_display > 1) tmp |= R600_PM_DISPLAY_GAP_VBLANK_OR_WM << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; else tmp |= R600_PM_DISPLAY_GAP_IGNORE << CG_DISPLAY_GAP_CNTL__DISP2_GAP__SHIFT; @@ -4178,17 +4216,8 @@ static void si_program_display_gap(struct amdgpu_device *adev) tmp = RREG32(DCCG_DISP_SLOW_SELECT_REG); pipe = (tmp & DCCG_DISP1_SLOW_SELECT_MASK) >> DCCG_DISP1_SLOW_SELECT_SHIFT; - if ((adev->pm.dpm.new_active_crtc_count > 0) && - (!(adev->pm.dpm.new_active_crtcs & (1 << pipe)))) { - /* find the first active crtc */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->pm.dpm.new_active_crtcs & (1 << i)) - break; - } - if (i == adev->mode_info.num_crtc) - pipe = 0; - else - pipe = i; + if (cfg->num_display > 0 && pipe != cfg->crtc_index) { + pipe = cfg->crtc_index; tmp &= ~DCCG_DISP1_SLOW_SELECT_MASK; tmp |= DCCG_DISP1_SLOW_SELECT(pipe); @@ -4199,7 +4228,7 @@ static void si_program_display_gap(struct amdgpu_device *adev) * This can be a problem on PowerXpress systems or if you want to use the card * for offscreen rendering or compute if there are no crtcs enabled. */ - si_notify_smc_display_change(adev, adev->pm.dpm.new_active_crtc_count > 0); + si_notify_smc_display_change(adev, cfg->num_display > 0); } static void si_enable_spread_spectrum(struct amdgpu_device *adev, bool enable) @@ -5508,7 +5537,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev, (pl->mclk <= pi->mclk_stutter_mode_threshold) && !eg_pi->uvd_enabled && (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) && - (adev->pm.dpm.new_active_crtc_count <= 2)) { + (adev->pm.pm_display_cfg.num_display <= 2)) { level->mcFlags |= SISLANDS_SMC_MC_STUTTER_EN; } @@ -5637,14 +5666,10 @@ static int si_populate_smc_t(struct amdgpu_device *adev, static int si_disable_ulv(struct amdgpu_device *adev) { - struct si_power_info *si_pi = si_get_pi(adev); - struct si_ulv_param *ulv = &si_pi->ulv; + PPSMC_Result r; - if (ulv->supported) - return (amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ? - 0 : -EINVAL; - - return 0; + r = amdgpu_si_send_msg_to_smc(adev, PPSMC_MSG_DisableULV); + return (r == PPSMC_Result_OK) ? 0 : -EINVAL; } static bool si_is_state_ulv_compatible(struct amdgpu_device *adev, @@ -5661,7 +5686,7 @@ static bool si_is_state_ulv_compatible(struct amdgpu_device *adev, /* XXX validate against display requirements! */ for (i = 0; i < adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count; i++) { - if (adev->clock.current_dispclk <= + if (adev->pm.pm_display_cfg.display_clk <= adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].clk) { if (ulv->pl.vddc < adev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[i].v) @@ -5815,39 +5840,36 @@ static int si_upload_ulv_state(struct amdgpu_device *adev) static int si_upload_smc_data(struct amdgpu_device *adev) { - struct amdgpu_crtc *amdgpu_crtc = NULL; - int i; + const struct amd_pp_display_configuration *cfg = &adev->pm.pm_display_cfg; + u32 crtc_index = 0; + u32 mclk_change_block_cp_min = 0; + u32 mclk_change_block_cp_max = 0; - if (adev->pm.dpm.new_active_crtc_count == 0) - return 0; + /* When a display is plugged in, program these so that the SMC + * performs MCLK switching when it doesn't cause flickering. + * When no display is plugged in, there is no need to restrict + * MCLK switching, so program them to zero. + */ + if (cfg->num_display) { + crtc_index = cfg->crtc_index; - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->pm.dpm.new_active_crtcs & (1 << i)) { - amdgpu_crtc = adev->mode_info.crtcs[i]; - break; + if (cfg->line_time_in_us) { + mclk_change_block_cp_min = 200 / cfg->line_time_in_us; + mclk_change_block_cp_max = 100 / cfg->line_time_in_us; } } - if (amdgpu_crtc == NULL) - return 0; - - if (amdgpu_crtc->line_time <= 0) - return 0; - - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_crtc_index, - amdgpu_crtc->crtc_id) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_crtc_index, + crtc_index); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, - amdgpu_crtc->wm_high / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_min, + mclk_change_block_cp_min); - if (si_write_smc_soft_register(adev, - SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, - amdgpu_crtc->wm_low / amdgpu_crtc->line_time) != PPSMC_Result_OK) - return 0; + si_write_smc_soft_register(adev, + SI_SMC_SOFT_REGISTER_mclk_change_block_cp_max, + mclk_change_block_cp_max); return 0; } @@ -7951,15 +7973,16 @@ static void si_dpm_print_power_state(void *handle, struct rv7xx_pl *pl; int i; - amdgpu_dpm_print_class_info(rps->class, rps->class2); - amdgpu_dpm_print_cap_info(rps->caps); - DRM_INFO("\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + amdgpu_dpm_dbg_print_class_info(adev, rps->class, rps->class2); + amdgpu_dpm_dbg_print_cap_info(adev, rps->caps); + drm_dbg(adev_to_drm(adev), "\tuvd vclk: %d dclk: %d\n", rps->vclk, rps->dclk); + drm_dbg(adev_to_drm(adev), "\tvce evclk: %d ecclk: %d\n", rps->evclk, rps->ecclk); for (i = 0; i < ps->performance_level_count; i++) { pl = &ps->performance_levels[i]; - DRM_INFO("\t\tpower level %d sclk: %u mclk: %u vddc: %u vddci: %u pcie gen: %u\n", + drm_dbg(adev_to_drm(adev), "\t\tpower level %d sclk: %u mclk: %u vddc: %u vddci: %u pcie gen: %u\n", i, pl->sclk, pl->mclk, pl->vddc, pl->vddci, pl->pcie_gen + 1); } - amdgpu_dpm_print_ps_status(adev, rps); + amdgpu_dpm_dbg_print_ps_status(adev, rps); } static int si_dpm_early_init(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c index 4e65ab9e931c..281a5e377aee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_smc.c @@ -172,20 +172,42 @@ PPSMC_Result amdgpu_si_send_msg_to_smc(struct amdgpu_device *adev, { u32 tmp; int i; + int usec_timeout; + + /* SMC seems to process some messages exceptionally slowly. */ + switch (msg) { + case PPSMC_MSG_NoForcedLevel: + case PPSMC_MSG_SetEnabledLevels: + case PPSMC_MSG_SetForcedLevels: + case PPSMC_MSG_DisableULV: + case PPSMC_MSG_SwitchToSwState: + usec_timeout = 1000000; /* 1 sec */ + break; + default: + usec_timeout = 200000; /* 200 ms */ + break; + } if (!amdgpu_si_is_smc_running(adev)) return PPSMC_Result_Failed; WREG32(mmSMC_MESSAGE_0, msg); - for (i = 0; i < adev->usec_timeout; i++) { + for (i = 0; i < usec_timeout; i++) { tmp = RREG32(mmSMC_RESP_0); if (tmp != 0) break; udelay(1); } - return (PPSMC_Result)RREG32(mmSMC_RESP_0); + tmp = RREG32(mmSMC_RESP_0); + if (tmp == 0) { + drm_warn(adev_to_drm(adev), + "%s timeout on message: %x (SMC_SCRATCH0: %x)\n", + __func__, msg, RREG32(mmSMC_SCRATCH0)); + } + + return (PPSMC_Result)tmp; } PPSMC_Result amdgpu_si_wait_for_smc_inactive(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index b48a031cbba0..554492dfa3c0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1554,16 +1554,7 @@ static void pp_pm_compute_clocks(void *handle) struct amdgpu_device *adev = hwmgr->adev; if (!adev->dc_enabled) { - amdgpu_dpm_get_active_displays(adev); - adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; - adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); - adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); - /* we have issues with mclk switching with - * refresh rates over 120 hz on the non-DC code. - */ - if (adev->pm.pm_display_cfg.vrefresh > 120) - adev->pm.pm_display_cfg.min_vblank_time = 0; - + amdgpu_dpm_get_display_cfg(adev); pp_display_configuration_change(handle, &adev->pm.pm_display_cfg); } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 8d40ed0f0e83..ce166a7f8e42 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -563,8 +563,8 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3( PP_ASSERT_WITH_CODE((NULL != voltage_info), "Could not find Voltage Table in BIOS.", return false;); - ret = (NULL != atomctrl_lookup_voltage_type_v3 - (voltage_info, voltage_type, voltage_mode)) ? true : false; + ret = atomctrl_lookup_voltage_type_v3 + (voltage_info, voltage_type, voltage_mode) != NULL; return ret; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9a821563bc8e..14ccd743ca1d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1032,7 +1032,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; uint32_t min_freq, max_freq = 0; - uint32_t ret = 0; + int ret = 0; switch (type) { case PP_SCLK: diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 8da882c51856..9b28c0728269 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5444,8 +5444,7 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = table_info->cac_dtp_table->usSoftwareShutdownTemp * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; else if (hwmgr->pp_table_version == PP_TABLE_V0) - thermal_data->max = data->thermal_temp_setting.temperature_shutdown * - PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->max = data->thermal_temp_setting.temperature_shutdown; thermal_data->sw_ctf_threshold = thermal_data->max; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c index 79a566f3564a..c305ea4ec17d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c @@ -149,7 +149,7 @@ int phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, } cgs_write_register(hwmgr->device, indirect_port, index); - return phm_wait_on_register(hwmgr, indirect_port + 1, mask, value); + return phm_wait_on_register(hwmgr, indirect_port + 1, value, mask); } int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index 5e43ad2b2956..0a876c840c79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2024,7 +2024,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; table->VRConfig = 0; @@ -2540,9 +2540,8 @@ static int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int fiji_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 17d2f5bff4a7..aa3ae9b115c4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2028,7 +2028,7 @@ static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; result = iceland_populate_smc_svi2_config(hwmgr, table); @@ -2655,9 +2655,8 @@ static int iceland_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool iceland_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } const struct pp_smumgr_func iceland_smu_funcs = { diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c index ff6b563ecbf5..bf6d09572cfc 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/polaris10_smumgr.c @@ -2578,9 +2578,8 @@ static int polaris10_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool polaris10_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int polaris10_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c index baf51cd82a35..0d4cbe4113a0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c @@ -401,7 +401,7 @@ failed: int smu7_check_fw_load_finish(struct pp_hwmgr *hwmgr, uint32_t fw_type) { struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); - uint32_t ret; + int ret; ret = phm_wait_on_indirect_register(hwmgr, mmSMC_IND_INDEX_11, smu_data->soft_regs_start + smum_get_offsetof(hwmgr, diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c index 6fe6e6abb5d8..2e21f9d066cb 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/tonga_smumgr.c @@ -3139,9 +3139,8 @@ static int tonga_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool tonga_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) - ? true : false; + return PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON) == 1; } static int tonga_update_dpm_settings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d79a1d94661a..fb8086859857 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -76,6 +76,10 @@ static void smu_power_profile_mode_get(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile_mode); static void smu_power_profile_mode_put(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile_mode); +static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type); +static int smu_od_edit_dpm_table(void *handle, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); static int smu_sys_get_pp_feature_mask(void *handle, char *buf) @@ -134,12 +138,17 @@ int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value) } int smu_set_soft_freq_range(struct smu_context *smu, - enum smu_clk_type clk_type, + enum pp_clock_type type, uint32_t min, uint32_t max) { + enum smu_clk_type clk_type; int ret = 0; + clk_type = smu_convert_to_smuclk(type); + if (clk_type == SMU_CLK_COUNT) + return -EINVAL; + if (smu->ppt_funcs->set_soft_freq_limited_range) ret = smu->ppt_funcs->set_soft_freq_limited_range(smu, clk_type, @@ -307,6 +316,26 @@ static int smu_dpm_set_vpe_enable(struct smu_context *smu, return ret; } +static int smu_dpm_set_isp_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret; + + if (!smu->ppt_funcs->dpm_set_isp_enable) + return 0; + + if (atomic_read(&power_gate->isp_gated) ^ enable) + return 0; + + ret = smu->ppt_funcs->dpm_set_isp_enable(smu, enable); + if (!ret) + atomic_set(&power_gate->isp_gated, !enable); + + return ret; +} + static int smu_dpm_set_umsch_mm_enable(struct smu_context *smu, bool enable) { @@ -408,6 +437,12 @@ static int smu_dpm_set_power_gate(void *handle, dev_err(smu->adev->dev, "Failed to power %s VPE!\n", gate ? "gate" : "ungate"); break; + case AMD_IP_BLOCK_TYPE_ISP: + ret = smu_dpm_set_isp_enable(smu, !gate); + if (ret) + dev_err(smu->adev->dev, "Failed to power %s ISP!\n", + gate ? "gate" : "ungate"); + break; default: dev_err(smu->adev->dev, "Unsupported block type!\n"); return -EINVAL; @@ -1004,6 +1039,21 @@ static int smu_fini_fb_allocations(struct smu_context *smu) return 0; } +static void smu_update_gpu_addresses(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *pm_status_table = smu_table->tables + SMU_TABLE_PMSTATUSLOG; + struct smu_table *driver_table = &(smu_table->driver_table); + struct smu_table *dummy_read_1_table = &smu_table->dummy_read_1_table; + + if (pm_status_table->bo) + pm_status_table->mc_address = amdgpu_bo_fb_aper_addr(pm_status_table->bo); + if (driver_table->bo) + driver_table->mc_address = amdgpu_bo_fb_aper_addr(driver_table->bo); + if (dummy_read_1_table->bo) + dummy_read_1_table->mc_address = amdgpu_bo_fb_aper_addr(dummy_read_1_table->bo); +} + /** * smu_alloc_memory_pool - allocate memory pool in the system memory * @@ -1265,6 +1315,33 @@ static void smu_init_power_profile(struct smu_context *smu) smu_power_profile_mode_get(smu, smu->power_profile_mode); } +void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + if (fea_id >= SMU_FEATURE_CAP_ID__COUNT) + return; + + set_bit(fea_id, fea_cap->cap_map); +} + +bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + if (fea_id >= SMU_FEATURE_CAP_ID__COUNT) + return false; + + return test_bit(fea_id, fea_cap->cap_map); +} + +static void smu_feature_cap_init(struct smu_context *smu) +{ + struct smu_feature_cap *fea_cap = &smu->fea_cap; + + bitmap_zero(fea_cap->cap_map, SMU_FEATURE_CAP_ID__COUNT); +} + static int smu_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1285,6 +1362,7 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); + atomic_set(&smu->smu_power.power_gate.isp_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); smu_init_power_profile(smu); @@ -1296,6 +1374,8 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) INIT_DELAYED_WORK(&smu->swctf_delayed_work, smu_swctf_delayed_work_handler); + smu_feature_cap_init(smu); + ret = smu_smc_table_sw_init(smu); if (ret) { dev_err(adev->dev, "Failed to sw init smc table!\n"); @@ -1672,37 +1752,6 @@ static int smu_smc_hw_setup(struct smu_context *smu) } } - ret = smu_system_features_control(smu, true); - if (ret) { - dev_err(adev->dev, "Failed to enable requested dpm features!\n"); - return ret; - } - - smu_init_xgmi_plpd_mode(smu); - - ret = smu_feature_get_enabled_mask(smu, &features_supported); - if (ret) { - dev_err(adev->dev, "Failed to retrieve supported dpm features!\n"); - return ret; - } - bitmap_copy(feature->supported, - (unsigned long *)&features_supported, - feature->feature_num); - - if (!smu_is_dpm_running(smu)) - dev_info(adev->dev, "dpm has been disabled\n"); - - /* - * Set initialized values (get from vbios) to dpm tables context such as - * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each - * type of clks. - */ - ret = smu_set_default_dpm_table(smu); - if (ret) { - dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); - return ret; - } - if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) pcie_gen = 4; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) @@ -1738,6 +1787,37 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } + ret = smu_system_features_control(smu, true); + if (ret) { + dev_err(adev->dev, "Failed to enable requested dpm features!\n"); + return ret; + } + + smu_init_xgmi_plpd_mode(smu); + + ret = smu_feature_get_enabled_mask(smu, &features_supported); + if (ret) { + dev_err(adev->dev, "Failed to retrieve supported dpm features!\n"); + return ret; + } + bitmap_copy(feature->supported, + (unsigned long *)&features_supported, + feature->feature_num); + + if (!smu_is_dpm_running(smu)) + dev_info(adev->dev, "dpm has been disabled\n"); + + /* + * Set initialized values (get from vbios) to dpm tables context such as + * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each + * type of clks. + */ + ret = smu_set_default_dpm_table(smu); + if (ret) { + dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); + return ret; + } + ret = smu_get_thermal_temperature_range(smu); if (ret) { dev_err(adev->dev, "Failed to get thermal temperature ranges!\n"); @@ -1780,6 +1860,9 @@ static int smu_start_smc_engine(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; + if (amdgpu_virt_xgmi_migrate_enabled(adev)) + smu_update_gpu_addresses(smu); + smu->smc_fw_state = SMU_FW_INIT; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -1842,7 +1925,6 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->vcn.num_vcn_inst; i++) smu_dpm_set_vcn_enable(smu, true, i); smu_dpm_set_jpeg_enable(smu, true); - smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); smu_set_mall_enable(smu); smu_set_gfx_cgpg(smu, true); @@ -2050,7 +2132,6 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block) } smu_dpm_set_jpeg_enable(smu, false); adev->jpeg.cur_state = AMD_PG_STATE_GATE; - smu_dpm_set_vpe_enable(smu, false); smu_dpm_set_umsch_mm_enable(smu, false); if (!smu->pm_enabled) @@ -2144,6 +2225,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) int ret; struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); if (amdgpu_sriov_multi_vf_mode(adev)) return 0; @@ -2175,6 +2257,18 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) adev->pm.dpm_enabled = true; + if (smu->current_power_limit) { + ret = smu_set_power_limit(smu, smu->current_power_limit); + if (ret && ret != -EOPNOTSUPP) + return ret; + } + + if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL && smu->od_enabled) { + ret = smu_od_edit_dpm_table(smu, PP_OD_COMMIT_DPM_TABLE, NULL, 0); + if (ret) + return ret; + } + dev_info(adev->dev, "SMU is resumed successfully!\n"); return 0; @@ -2935,6 +3029,12 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) clk_type = SMU_DCLK; break; case PP_DCLK1: clk_type = SMU_DCLK1; break; + case PP_ISPICLK: + clk_type = SMU_ISPICLK; + break; + case PP_ISPXCLK: + clk_type = SMU_ISPXCLK; + break; case OD_SCLK: clk_type = SMU_OD_SCLK; break; case OD_MCLK: @@ -3434,15 +3534,10 @@ bool smu_mode1_reset_is_support(struct smu_context *smu) bool smu_link_reset_is_support(struct smu_context *smu) { - bool ret = false; - if (!smu->pm_enabled) return false; - if (smu->ppt_funcs && smu->ppt_funcs->link_reset_is_support) - ret = smu->ppt_funcs->link_reset_is_support(smu); - - return ret; + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__LINK_RESET); } int smu_mode1_reset(struct smu_context *smu) @@ -3758,6 +3853,51 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, return ret; } +static ssize_t smu_sys_get_temp_metrics(void *handle, enum smu_temp_metric_type type, void *table) +{ + struct smu_context *smu = handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + enum smu_table_id table_id; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (!smu->smu_temp.temp_funcs || !smu->smu_temp.temp_funcs->get_temp_metrics) + return -EOPNOTSUPP; + + table_id = smu_metrics_get_temp_table_id(type); + + if (table_id == SMU_TABLE_COUNT) + return -EINVAL; + + /* If the request is to get size alone, return the cached table size */ + if (!table && tables[table_id].cache.size) + return tables[table_id].cache.size; + + if (smu_table_cache_is_valid(&tables[table_id])) { + memcpy(table, tables[table_id].cache.buffer, + tables[table_id].cache.size); + return tables[table_id].cache.size; + } + + return smu->smu_temp.temp_funcs->get_temp_metrics(smu, type, table); +} + +static bool smu_temp_metrics_is_supported(void *handle, enum smu_temp_metric_type type) +{ + struct smu_context *smu = handle; + bool ret = false; + + if (!smu->pm_enabled) + return false; + + if (smu->smu_temp.temp_funcs && smu->smu_temp.temp_funcs->temp_metrics_is_supported) + ret = smu->smu_temp.temp_funcs->temp_metrics_is_supported(smu, type); + + return ret; +} + static ssize_t smu_sys_get_xcp_metrics(void *handle, int xcp_id, void *table) { struct smu_context *smu = handle; @@ -3830,6 +3970,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_dpm_clock_table = smu_get_dpm_clock_table, .get_smu_prv_buf_details = smu_get_prv_buffer_details, .get_xcp_metrics = smu_sys_get_xcp_metrics, + .get_temp_metrics = smu_sys_get_temp_metrics, + .temp_metrics_is_supported = smu_temp_metrics_is_supported, }; int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event, @@ -3985,12 +4127,7 @@ int smu_send_rma_reason(struct smu_context *smu) */ bool smu_reset_sdma_is_supported(struct smu_context *smu) { - bool ret = false; - - if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma_is_supported) - ret = smu->ppt_funcs->reset_sdma_is_supported(smu); - - return ret; + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); } int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) @@ -4003,6 +4140,11 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +bool smu_reset_vcn_is_supported(struct smu_context *smu) +{ + return smu_feature_cap_test(smu, SMU_FEATURE_CAP_ID__VCN_RESET); +} + int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 9aacc7bc1c69..582c186d8b62 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -249,6 +249,14 @@ struct smu_user_dpm_profile { tables[table_id].domain = d; \ } while (0) +struct smu_table_cache { + void *buffer; + size_t size; + /* interval in ms*/ + uint32_t interval; + unsigned long last_cache_time; +}; + struct smu_table { uint64_t size; uint32_t align; @@ -257,6 +265,7 @@ struct smu_table { void *cpu_addr; struct amdgpu_bo *bo; uint32_t version; + struct smu_table_cache cache; }; enum smu_perf_level_designation { @@ -322,6 +331,9 @@ enum smu_table_id { SMU_TABLE_ECCINFO, SMU_TABLE_COMBO_PPTABLE, SMU_TABLE_WIFIBAND, + SMU_TABLE_GPUBOARD_TEMP_METRICS, + SMU_TABLE_BASEBOARD_TEMP_METRICS, + SMU_TABLE_PMFW_SYSTEM_METRICS, SMU_TABLE_COUNT, }; @@ -396,12 +408,17 @@ struct smu_dpm_context { struct smu_dpm_policy_ctxt *dpm_policies; }; +struct smu_temp_context { + const struct smu_temp_funcs *temp_funcs; +}; + struct smu_power_gate { bool uvd_gated; bool vce_gated; atomic_t vcn_gated[AMDGPU_MAX_VCN_INSTANCES]; atomic_t jpeg_gated; atomic_t vpe_gated; + atomic_t isp_gated; atomic_t umsch_mm_gated; }; @@ -511,6 +528,17 @@ enum smu_fw_status { */ #define SMU_WBRF_EVENT_HANDLING_PACE 10 +enum smu_feature_cap_id { + SMU_FEATURE_CAP_ID__LINK_RESET = 0, + SMU_FEATURE_CAP_ID__SDMA_RESET, + SMU_FEATURE_CAP_ID__VCN_RESET, + SMU_FEATURE_CAP_ID__COUNT, +}; + +struct smu_feature_cap { + DECLARE_BITMAP(cap_map, SMU_FEATURE_CAP_ID__COUNT); +}; + struct smu_context { struct amdgpu_device *adev; struct amdgpu_irq_src irq_source; @@ -528,10 +556,12 @@ struct smu_context { struct smu_table_context smu_table; struct smu_dpm_context smu_dpm; struct smu_power_context smu_power; + struct smu_temp_context smu_temp; struct smu_feature smu_feature; struct amd_pp_display_configuration *display_config; struct smu_baco_context smu_baco; struct smu_temperature_range thermal_range; + struct smu_feature_cap fea_cap; void *od_settings; struct smu_umd_pstate_table pstate_table; @@ -623,6 +653,28 @@ struct smu_context { struct i2c_adapter; /** + * struct smu_temp_funcs - Callbacks used to get temperature data. + */ +struct smu_temp_funcs { + /** + * @get_temp_metrics: Calibrate voltage/frequency curve to fit the system's + * power delivery and voltage margins. Required for adaptive + * @type Temperature metrics type(baseboard/gpuboard) + * Return: Size of &table + */ + ssize_t (*get_temp_metrics)(struct smu_context *smu, + enum smu_temp_metric_type type, void *table); + + /** + * @temp_metrics_is_support: Get if specific temperature metrics is supported + * @type Temperature metrics type(baseboard/gpuboard) + * Return: true if supported else false + */ + bool (*temp_metrics_is_supported)(struct smu_context *smu, enum smu_temp_metric_type type); + +}; + +/** * struct pptable_funcs - Callbacks used to interact with the SMU. */ struct pptable_funcs { @@ -1233,11 +1285,6 @@ struct pptable_funcs { bool (*mode1_reset_is_support)(struct smu_context *smu); /** - * @link_reset_is_support: Check if GPU supports link reset. - */ - bool (*link_reset_is_support)(struct smu_context *smu); - - /** * @mode1_reset: Perform mode1 reset. * * Complete GPU reset. @@ -1387,10 +1434,6 @@ struct pptable_funcs { * @reset_sdma: message SMU to soft reset sdma instance. */ int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask); - /** - * @reset_sdma_is_supported: Check if support resets the SDMA engine. - */ - bool (*reset_sdma_is_supported)(struct smu_context *smu); /** * @reset_vcn: message SMU to soft reset vcn instance. @@ -1436,6 +1479,12 @@ struct pptable_funcs { int (*dpm_set_vpe_enable)(struct smu_context *smu, bool enable); /** + * @dpm_set_isp_enable: Enable/disable ISP engine dynamic power + * management. + */ + int (*dpm_set_isp_enable)(struct smu_context *smu, bool enable); + + /** * @dpm_set_umsch_mm_enable: Enable/disable UMSCH engine dynamic power * management. */ @@ -1615,6 +1664,71 @@ typedef struct { struct smu_dpm_policy *smu_get_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type); +static inline enum smu_table_id +smu_metrics_get_temp_table_id(enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + return SMU_TABLE_BASEBOARD_TEMP_METRICS; + case SMU_TEMP_METRIC_GPUBOARD: + return SMU_TABLE_GPUBOARD_TEMP_METRICS; + default: + return SMU_TABLE_COUNT; + } + + return SMU_TABLE_COUNT; +} + +static inline void smu_table_cache_update_time(struct smu_table *table, + unsigned long time) +{ + table->cache.last_cache_time = time; +} + +static inline bool smu_table_cache_is_valid(struct smu_table *table) +{ + if (!table->cache.buffer || !table->cache.last_cache_time || + !table->cache.interval || !table->cache.size || + time_after(jiffies, + table->cache.last_cache_time + + msecs_to_jiffies(table->cache.interval))) + return false; + + return true; +} + +static inline int smu_table_cache_init(struct smu_context *smu, + enum smu_table_id table_id, size_t size, + uint32_t cache_interval) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + tables[table_id].cache.buffer = kzalloc(size, GFP_KERNEL); + if (!tables[table_id].cache.buffer) + return -ENOMEM; + + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = cache_interval; + tables[table_id].cache.size = size; + + return 0; +} + +static inline void smu_table_cache_fini(struct smu_context *smu, + enum smu_table_id table_id) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + if (tables[table_id].cache.buffer) { + kfree(tables[table_id].cache.buffer); + tables[table_id].cache.buffer = NULL; + tables[table_id].cache.last_cache_time = 0; + tables[table_id].cache.interval = 0; + } +} + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, @@ -1635,7 +1749,7 @@ int smu_write_watermarks_table(struct smu_context *smu); int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); -int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, +int smu_set_soft_freq_range(struct smu_context *smu, enum pp_clock_type clk_type, uint32_t min, uint32_t max); int smu_set_gfx_power_up_by_imu(struct smu_context *smu); @@ -1666,10 +1780,14 @@ int smu_send_rma_reason(struct smu_context *smu); int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); bool smu_reset_sdma_is_supported(struct smu_context *smu); int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask); +bool smu_reset_vcn_is_supported(struct smu_context *smu); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, enum pp_pm_policy p_type, char *sysbuf); #endif + +void smu_feature_cap_set(struct smu_context *smu, enum smu_feature_cap_id fea_id); +bool smu_feature_cap_test(struct smu_context *smu, enum smu_feature_cap_id fea_id); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h index 1bc30db22f9c..cd44f4254134 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h @@ -106,6 +106,7 @@ typedef struct { #define NUM_FCLK_DPM_LEVELS 8 #define NUM_MEM_PSTATE_LEVELS 4 +#define ISP_ALL_TILES_MASK 0x7FF typedef struct { uint32_t UClk; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h index d7505cfc433a..bf6aa9620911 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -86,8 +86,10 @@ typedef enum { /*36*/ FEATURE_PIT = 36, /*37*/ FEATURE_DVO = 37, /*38*/ FEATURE_XVMINORPSM_CLKSTOP_DS = 38, +/*39*/ FEATURE_GLOBAL_DPM = 39, +/*40*/ FEATURE_NODE_POWER_MANAGER = 40, -/*39*/ NUM_FEATURES = 39 +/*41*/ NUM_FEATURES = 41 } FEATURE_LIST_e; //enum for MPIO PCIe gen speed msgs @@ -133,7 +135,63 @@ typedef enum { GFX_DVM_MARGIN_COUNT } GFX_DVM_MARGIN_e; -#define SMU_METRICS_TABLE_VERSION 0x12 +typedef enum{ + SYSTEM_TEMP_UBB_FPGA, + SYSTEM_TEMP_UBB_FRONT, + SYSTEM_TEMP_UBB_BACK, + SYSTEM_TEMP_UBB_OAM7, + SYSTEM_TEMP_UBB_IBC, + SYSTEM_TEMP_UBB_UFPGA, + SYSTEM_TEMP_UBB_OAM1, + SYSTEM_TEMP_OAM_0_1_HSC, + SYSTEM_TEMP_OAM_2_3_HSC, + SYSTEM_TEMP_OAM_4_5_HSC, + SYSTEM_TEMP_OAM_6_7_HSC, + SYSTEM_TEMP_UBB_FPGA_0V72_VR, + SYSTEM_TEMP_UBB_FPGA_3V3_VR, + SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR, + SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR, + SYSTEM_TEMP_RETIMER_0_1_0V9_VR, + SYSTEM_TEMP_RETIMER_4_5_0V9_VR, + SYSTEM_TEMP_RETIMER_2_3_0V9_VR, + SYSTEM_TEMP_RETIMER_6_7_0V9_VR, + SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR, + SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR, + SYSTEM_TEMP_IBC_HSC, + SYSTEM_TEMP_IBC, + SYSTEM_TEMP_MAX_ENTRIES = 32 +} SYSTEM_TEMP_e; + +typedef enum{ + NODE_TEMP_RETIMER, + NODE_TEMP_IBC_TEMP, + NODE_TEMP_IBC_2_TEMP, + NODE_TEMP_VDD18_VR_TEMP, + NODE_TEMP_04_HBM_B_VR_TEMP, + NODE_TEMP_04_HBM_D_VR_TEMP, + NODE_TEMP_MAX_TEMP_ENTRIES = 12 +} NODE_TEMP_e; + +typedef enum { + SVI_VDDCR_VDD0_TEMP, + SVI_VDDCR_VDD1_TEMP, + SVI_VDDCR_VDD2_TEMP, + SVI_VDDCR_VDD3_TEMP, + SVI_VDDCR_SOC_A_TEMP, + SVI_VDDCR_SOC_C_TEMP, + SVI_VDDCR_SOCIO_A_TEMP, + SVI_VDDCR_SOCIO_C_TEMP, + SVI_VDD_085_HBM_TEMP, + SVI_VDDCR_11_HBM_B_TEMP, + SVI_VDDCR_11_HBM_D_TEMP, + SVI_VDD_USR_TEMP, + SVI_VDDIO_11_E32_TEMP, + SVI_MAX_TEMP_ENTRIES, // 13 +} SVI_TEMP_e; + +#define SMU_METRICS_TABLE_VERSION 0x14 + +#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x1 typedef struct __attribute__((packed, aligned(4))) { uint64_t AccumulationCounter; @@ -229,11 +287,32 @@ typedef struct __attribute__((packed, aligned(4))) { uint64_t GfxclkBelowHostLimitThmAcc[8]; uint64_t GfxclkBelowHostLimitTotalAcc[8]; uint64_t GfxclkLowUtilizationAcc[8]; + + uint32_t AidTemperature[4]; + uint32_t XcdTemperature[8]; + uint32_t HbmTemperature[8]; } MetricsTable_t; #define SMU_VF_METRICS_TABLE_MASK (1 << 31) #define SMU_VF_METRICS_TABLE_VERSION (0x6 | SMU_VF_METRICS_TABLE_MASK) +#pragma pack(push, 4) +typedef struct { + uint64_t AccumulationCounter; // Last update timestamp + uint16_t LabelVersion; // Defaults to 0. + uint16_t NodeIdentifier; // Unique identifier to each node on system. + int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF + int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius + int16_t spare[7]; + + //NPM: NODE POWER MANAGEMENT + uint32_t NodePowerLimit; + uint32_t NodePower; + uint32_t GlobalPPTResidencyAcc; +} SystemMetricsTable_t; +#pragma pack(pop) + typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; uint32_t InstGfxclk_TargFreq; @@ -275,6 +354,19 @@ typedef struct { //PSNs uint64_t PublicSerialNumber_AID[4]; uint64_t PublicSerialNumber_XCD[8]; + + //XGMI + uint32_t MaxXgmiWidth; + uint32_t MaxXgmiBitrate; + + // Telemetry + uint32_t InputTelemetryVoltageInmV; + + // General info + uint32_t pldmVersion[2]; + + //Node Power Limit + uint32_t MaxNodePowerLimit; } StaticMetricsTable_t; #pragma pack(pop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h index e1f490b6ce64..4b066c42e0ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h @@ -116,7 +116,12 @@ #define PPSMC_MSG_DumpErrorRecord 0x57 #define PPSMC_MSG_EraseRasTable 0x58 #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVfArbitersByIndex 0x5A +#define PPSMC_MSG_GetBadPageSeverity 0x5B +#define PPSMC_MSG_GetSystemMetricsTable 0x5C +#define PPSMC_MSG_GetSystemMetricsVersion 0x5D +#define PPSMC_MSG_ResetVCN 0x5E +#define PPSMC_Message_Count 0x5F //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41f268313613..63a088ef7169 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -94,9 +94,9 @@ #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SetThrottlingPolicy 0x44 #define PPSMC_MSG_ResetSDMA 0x4D -#define PPSMC_MSG_ResetVCN 0x4E #define PPSMC_MSG_GetStaticMetricsTable 0x59 -#define PPSMC_Message_Count 0x5A +#define PPSMC_MSG_ResetVCN 0x5B +#define PPSMC_Message_Count 0x5C //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index eefdaa0b5df6..2256c77da636 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -278,7 +278,8 @@ __SMU_DUMMY_MAP(MALLPowerState), \ __SMU_DUMMY_MAP(ResetSDMA), \ __SMU_DUMMY_MAP(ResetVCN), \ - __SMU_DUMMY_MAP(GetStaticMetricsTable), + __SMU_DUMMY_MAP(GetStaticMetricsTable), \ + __SMU_DUMMY_MAP(GetSystemMetricsTable), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -305,6 +306,8 @@ enum smu_clk_type { SMU_MCLK, SMU_PCIE, SMU_LCLK, + SMU_ISPICLK, + SMU_ISPXCLK, SMU_OD_CCLK, SMU_OD_SCLK, SMU_OD_MCLK, @@ -467,6 +470,7 @@ enum smu_feature_mask { /* Message category flags */ #define SMU_MSG_VF_FLAG (1U << 0) #define SMU_MSG_RAS_PRI (1U << 1) +#define SMU_MSG_NO_PRECHECK (1U << 2) /* Firmware capability flags */ #define SMU_FW_CAP_RAS_PRI (1U << 0) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h index 251ed011b3b0..251ed011b3b0 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_0_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0_0_pptable.h diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 9ad46f545d15..4fff78da81ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1745,10 +1745,10 @@ static int arcturus_i2c_control_init(struct smu_context *smu) snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -1756,27 +1756,12 @@ static int arcturus_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void arcturus_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } @@ -1897,7 +1882,7 @@ static ssize_t arcturus_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 7fad5dfb39c4..0028f10ead42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2444,7 +2444,8 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t smu_pcie_arg; - int ret, i; + int ret = 0; + int i; /* lclk dpm table setup */ for (i = 0; i < MAX_PCIE_CONF; i++) { @@ -2453,25 +2454,27 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, } for (i = 0; i < NUM_LINK_LEVELS; i++) { - smu_pcie_arg = (i << 16) | - ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : - (pcie_gen_cap << 8)) | ((pptable->PcieLaneCount[i] <= pcie_width_cap) ? - pptable->PcieLaneCount[i] : pcie_width_cap); - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - - if (ret) - return ret; - - if (pptable->PcieGenSpeed[i] > pcie_gen_cap) - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; - if (pptable->PcieLaneCount[i] > pcie_width_cap) - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; + if (pptable->PcieGenSpeed[i] > pcie_gen_cap || + pptable->PcieLaneCount[i] > pcie_width_cap) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = + pptable->PcieGenSpeed[i] > pcie_gen_cap ? + pcie_gen_cap : pptable->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = + pptable->PcieLaneCount[i] > pcie_width_cap ? + pcie_width_cap : pptable->PcieLaneCount[i]; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_gen_cap << 8; + smu_pcie_arg |= pcie_width_cap; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } } - return 0; + return ret; } static inline void navi10_dump_od_table(struct smu_context *smu, @@ -3142,10 +3145,10 @@ static int navi10_i2c_control_init(struct smu_context *smu) control->quirks = &navi10_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -3153,27 +3156,12 @@ static int navi10_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void navi10_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 115e3fa456bc..31c2c0386b1f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2145,7 +2145,8 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint8_t min_gen_speed, max_gen_speed; uint8_t min_lane_width, max_lane_width; uint32_t smu_pcie_arg; - int ret, i; + int ret = 0; + int i; GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); @@ -2170,19 +2171,22 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { - smu_pcie_arg = (i << 16 | + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK) || + table_member1[i] > pcie_gen_cap || table_member2[i] > pcie_width_cap) { + smu_pcie_arg = (i << 16 | pcie_table->pcie_gen[i] << 8 | pcie_table->pcie_lane[i]); - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - if (ret) - return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } } - return 0; + return ret; } static int sienna_cichlid_get_dpm_ultimate_freq(struct smu_context *smu, @@ -2644,10 +2648,10 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu) control->quirks = &sienna_cichlid_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } /* assign the buses used for the FRU EEPROM and RAS EEPROM */ @@ -2656,27 +2660,12 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void sienna_cichlid_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index a55ea76d7399..2c9869feba61 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -666,7 +666,6 @@ static int vangogh_print_clk_levels(struct smu_context *smu, { DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; @@ -682,31 +681,25 @@ static int vangogh_print_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_SCLK: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", - (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", - (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); break; case SMU_OD_CCLK: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); - size += sysfs_emit_at(buf, size, "0: %10uMhz\n", - (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); - size += sysfs_emit_at(buf, size, "1: %10uMhz\n", - (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", + (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", + (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); break; case SMU_OD_RANGE: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); - size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", - smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); - size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", - smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); - } + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); + size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", + smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); break; case SMU_SOCCLK: /* the level 3 ~ 6 of socclk use the same frequency for vangogh */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 9481f897432d..3baf20f4c373 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -470,7 +470,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu, static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) { uint32_t min = 0, max = 0; - uint32_t ret = 0; + int ret = 0; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetMinGfxclkFrequency, @@ -497,7 +497,6 @@ static int renoir_print_clk_levels(struct smu_context *smu, int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; SmuMetrics_t metrics; - struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); bool cur_value_match_level = false; memset(&metrics, 0, sizeof(metrics)); @@ -510,28 +509,24 @@ static int renoir_print_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_RANGE: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_GetMinGfxclkFrequency, - 0, &min); - if (ret) - return ret; - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_GetMaxGfxclkFrequency, - 0, &max); - if (ret) - return ret; - size += sysfs_emit_at(buf, size, "OD_RANGE\nSCLK: %10uMhz %10uMhz\n", min, max); - } + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMinGfxclkFrequency, + 0, &min); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMaxGfxclkFrequency, + 0, &max); + if (ret) + return ret; + size += sysfs_emit_at(buf, size, "OD_RANGE\nSCLK: %10uMhz %10uMhz\n", min, max); break; case SMU_OD_SCLK: - if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq; - max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq; - size += sysfs_emit_at(buf, size, "OD_SCLK\n"); - size += sysfs_emit_at(buf, size, "0:%10uMhz\n", min); - size += sysfs_emit_at(buf, size, "1:%10uMhz\n", max); - } + min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq; + max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq; + size += sysfs_emit_at(buf, size, "OD_SCLK\n"); + size += sysfs_emit_at(buf, size, "0:%10uMhz\n", min); + size += sysfs_emit_at(buf, size, "1:%10uMhz\n", max); break; case SMU_GFXCLK: case SMU_SCLK: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 6de653d2ed62..18d5d0704509 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -342,6 +342,61 @@ static int aldebaran_get_allowed_feature_mask(struct smu_context *smu, return 0; } +static int aldebaran_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + uint32_t min_clk, max_clk; + + if (amdgpu_sriov_vf(smu->adev)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + return -EINVAL; + } + + min_clk = dpm_table->min; + max_clk = dpm_table->max; + + if (min) { + if (!min_clk) + return -ENODATA; + *min = min_clk; + } + if (max) { + if (!max_clk) + return -ENODATA; + *max = max_clk; + } + + } else { + return smu_v13_0_get_dpm_ultimate_freq(smu, clk_type, min, max); + } + + return 0; +} + static int aldebaran_set_default_dpm_table(struct smu_context *smu) { struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; @@ -1586,33 +1641,22 @@ static int aldebaran_i2c_control_init(struct smu_context *smu) control->quirks = &aldebaran_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - i2c_del_adapter(control); - - return res; } static void aldebaran_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } @@ -1726,7 +1770,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu, ret = smu_cmn_get_metrics_table(smu, &metrics, - true); + false); if (ret) return ret; @@ -2081,7 +2125,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .set_azalia_d3_pme = smu_v13_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v13_0_get_max_sustainable_clocks_by_dc, .get_bamaco_support = aldebaran_get_bamaco_support, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = aldebaran_get_dpm_ultimate_freq, .set_soft_freq_limited_range = aldebaran_set_soft_freq_limited_range, .od_edit_dpm_table = aldebaran_usr_edit_dpm_table, .set_df_cstate = aldebaran_set_df_cstate, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a7167668d189..a89075e25717 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -58,6 +58,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin"); +MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); @@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -103,8 +104,13 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -282,7 +288,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) * Considering above, we just leave user a verbal message instead * of halt driver loading. */ - if (if_version != smu->smc_driver_if_version) { + if (smu->smc_driver_if_version != SMU_IGNORE_IF_VERSION && + if_version != smu->smc_driver_if_version) { dev_info(adev->dev, "smu driver if version = 0x%08x, smu fw if version = 0x%08x, " "smu fw program = %d, smu fw version = 0x%08x (%d.%d.%d)\n", smu->smc_driver_if_version, if_version, @@ -2380,7 +2387,8 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, &dpm_context->dpm_tables.pcie_table; int num_of_levels = pcie_table->num_of_link_levels; uint32_t smu_pcie_arg; - int ret, i; + int ret = 0; + int i; if (!num_of_levels) return 0; @@ -2396,30 +2404,38 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, for (i = 0; i < num_of_levels; i++) { pcie_table->pcie_gen[i] = pcie_gen_cap; pcie_table->pcie_lane[i] = pcie_width_cap; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; } } else { for (i = 0; i < num_of_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) + if (pcie_table->pcie_gen[i] > pcie_gen_cap || + pcie_table->pcie_lane[i] > pcie_width_cap) { pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) pcie_table->pcie_lane[i] = pcie_width_cap; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } } } - for (i = 0; i < num_of_levels; i++) { - smu_pcie_arg = i << 16; - smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; - smu_pcie_arg |= pcie_table->pcie_lane[i]; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - if (ret) - return ret; - } - - return 0; + return ret; } int smu_v13_0_disable_pmfw_state(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 5a9711e8cf68..c1062e5f0393 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -572,8 +572,6 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu) PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; struct smu_13_0_dpm_table *dpm_table; - struct smu_13_0_pcie_table *pcie_table; - uint32_t link_level; int ret = 0; /* socclk dpm table setup */ @@ -689,24 +687,6 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu) dpm_table->max = dpm_table->dpm_levels[0].value; } - /* lclk dpm table setup */ - pcie_table = &dpm_context->dpm_tables.pcie_table; - pcie_table->num_of_link_levels = 0; - for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { - if (!skutable->PcieGenSpeed[link_level] && - !skutable->PcieLaneCount[link_level] && - !skutable->LclkFreq[link_level]) - continue; - - pcie_table->pcie_gen[pcie_table->num_of_link_levels] = - skutable->PcieGenSpeed[link_level]; - pcie_table->pcie_lane[pcie_table->num_of_link_levels] = - skutable->PcieLaneCount[link_level]; - pcie_table->clk_freq[pcie_table->num_of_link_levels] = - skutable->LclkFreq[link_level]; - pcie_table->num_of_link_levels++; - } - /* dcefclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.dcef_table; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) { @@ -2845,10 +2825,10 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v13_0_0_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2858,27 +2838,12 @@ static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } @@ -3150,6 +3115,90 @@ static int smu_v13_0_0_set_power_limit(struct smu_context *smu, return 0; } +static int smu_v13_0_0_update_pcie_parameters(struct smu_context *smu, + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_13_0_pcie_table *pcie_table = + &dpm_context->dpm_tables.pcie_table; + int num_of_levels; + uint32_t smu_pcie_arg; + uint32_t link_level; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + int ret = 0; + int i; + + pcie_table->num_of_link_levels = 0; + + for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { + if (!skutable->PcieGenSpeed[link_level] && + !skutable->PcieLaneCount[link_level] && + !skutable->LclkFreq[link_level]) + continue; + + pcie_table->pcie_gen[pcie_table->num_of_link_levels] = + skutable->PcieGenSpeed[link_level]; + pcie_table->pcie_lane[pcie_table->num_of_link_levels] = + skutable->PcieLaneCount[link_level]; + pcie_table->clk_freq[pcie_table->num_of_link_levels] = + skutable->LclkFreq[link_level]; + pcie_table->num_of_link_levels++; + } + + num_of_levels = pcie_table->num_of_link_levels; + if (!num_of_levels) + return 0; + + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { + if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) + pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; + + if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap) + pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1]; + + /* Force all levels to use the same settings */ + for (i = 0; i < num_of_levels; i++) { + pcie_table->pcie_gen[i] = pcie_gen_cap; + pcie_table->pcie_lane[i] = pcie_width_cap; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } + } else { + for (i = 0; i < num_of_levels; i++) { + if (pcie_table->pcie_gen[i] > pcie_gen_cap || + pcie_table->pcie_lane[i] > pcie_width_cap) { + pcie_table->pcie_gen[i] = pcie_table->pcie_gen[i] > pcie_gen_cap ? + pcie_gen_cap : pcie_table->pcie_gen[i]; + pcie_table->pcie_lane[i] = pcie_table->pcie_lane[i] > pcie_width_cap ? + pcie_width_cap : pcie_table->pcie_lane[i]; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } + } + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -3179,7 +3228,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .feature_is_enabled = smu_cmn_feature_is_enabled, .print_clk_levels = smu_v13_0_0_print_clk_levels, .force_clk_levels = smu_v13_0_0_force_clk_levels, - .update_pcie_parameters = smu_v13_0_update_pcie_parameters, + .update_pcie_parameters = smu_v13_0_0_update_pcie_parameters, .get_thermal_temperature_range = smu_v13_0_0_get_thermal_temperature_range, .register_irq_handler = smu_v13_0_register_irq_handler, .enable_thermal_alert = smu_v13_0_enable_thermal_alert, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index e0d356f93ab0..cb3fea9e8cf3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -83,7 +83,6 @@ const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] = SMU_13_0_12_FEA_MAP(SMU_FEATURE_PIT_BIT, FEATURE_PIT), }; -// clang-format off const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -106,7 +105,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -137,9 +136,57 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), + MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), + MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 1), }; +int smu_v13_0_12_tables_init(struct smu_context *smu) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + struct smu_table_cache *cache; + int ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), 5); + + if (ret) + return ret; + + ret = smu_table_cache_init(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS, + sizeof(*baseboard_temp_metrics), 50); + if (ret) + return ret; + /* Initialize base board temperature metrics */ + cache = &(tables[SMU_TABLE_BASEBOARD_TEMP_METRICS].cache); + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) cache->buffer; + smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 0); + /* Initialize GPU board temperature metrics */ + ret = smu_table_cache_init(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS, + sizeof(*gpuboard_temp_metrics), 50); + if (ret) { + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + return ret; + } + cache = &(tables[SMU_TABLE_GPUBOARD_TEMP_METRICS].cache); + gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 *)cache->buffer; + smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0); + + return 0; +} + +void smu_v13_0_12_tables_fini(struct smu_context *smu) +{ + smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS); + smu_table_cache_fini(smu, SMU_TABLE_PMFW_SYSTEM_METRICS); +} + static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu, uint64_t *feature_mask) { @@ -187,14 +234,45 @@ int smu_v13_0_12_get_max_metrics_size(void) return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t)); } +size_t smu_v13_0_12_get_system_metrics_size(void) +{ + return sizeof(SystemMetricsTable_t); +} + +static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu, + StaticMetricsTable_t *static_metrics) +{ + struct smu_table_context *smu_table = &smu->smu_table; + uint16_t max_speed; + uint8_t max_width; + int ret; + + if (smu_table->tables[SMU_TABLE_SMU_METRICS].version >= 0x13) { + max_width = (uint8_t)static_metrics->MaxXgmiWidth; + max_speed = (uint16_t)static_metrics->MaxXgmiBitrate; + ret = 0; + } else { + MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; + + ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); + if (!ret) { + max_width = (uint8_t)metrics->XgmiWidth; + max_speed = (uint16_t)metrics->XgmiBitrate; + } + } + if (!ret) + amgpu_xgmi_set_max_speed_width(smu->adev, max_speed, max_width); +} + int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) { + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_table_context *smu_table = &smu->smu_table; StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; uint32_t table_version; - int ret, i; + int ret, i, n; if (!pptable->Init) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -233,10 +311,41 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu) /* use AID0 serial number by default */ pptable->PublicSerialNumber_AID = static_metrics->PublicSerialNumber_AID[0]; + + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + static_metrics->PublicSerialNumber_AID[i]); + } + n = ARRAY_SIZE(static_metrics->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + static_metrics->PublicSerialNumber_XCD[i]); + } + ret = smu_v13_0_12_fru_get_product_info(smu, static_metrics); if (ret) return ret; + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) { + if (!static_metrics->InputTelemetryVoltageInmV) { + dev_warn(smu->adev->dev, "Invalid board voltage %d\n", + static_metrics->InputTelemetryVoltageInmV); + } + dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV; + } + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PLDM_VERSION)) && + static_metrics->pldmVersion[0] != 0xFFFFFFFF) + smu->adev->firmware.pldm_version = + static_metrics->pldmVersion[0]; + if (smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS))) + pptable->MaxNodePowerLimit = + SMUQ10_ROUND(static_metrics->MaxNodePowerLimit); + smu_v13_0_12_init_xgmi_data(smu, static_metrics); pptable->Init = true; } @@ -263,7 +372,6 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; - int ret = 0; int xcc_id; /* For clocks with multiple instances, only report the first one */ @@ -319,9 +427,295 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, break; } + return 0; +} + +static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + struct smu_table *tables = smu_table->tables; + struct smu_table *sys_table; + int ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + if (smu_table_cache_is_valid(sys_table)) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, NULL); + if (ret) { + dev_info(smu->adev->dev, + "Failed to export system metrics table!\n"); + return ret; + } + + amdgpu_asic_invalidate_hdp(smu->adev, NULL); + smu_table_cache_update_time(sys_table, jiffies); + memcpy(sys_table->cache.buffer, table->cpu_addr, + smu_v13_0_12_get_system_metrics_size()); + + return 0; +} + +static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e type) +{ + switch (type) { + case NODE_TEMP_RETIMER: + return AMDGPU_RETIMER_X_TEMP; + case NODE_TEMP_IBC_TEMP: + return AMDGPU_OAM_X_IBC_TEMP; + case NODE_TEMP_IBC_2_TEMP: + return AMDGPU_OAM_X_IBC_2_TEMP; + case NODE_TEMP_VDD18_VR_TEMP: + return AMDGPU_OAM_X_VDD18_VR_TEMP; + case NODE_TEMP_04_HBM_B_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_B_VR_TEMP; + case NODE_TEMP_04_HBM_D_VR_TEMP: + return AMDGPU_OAM_X_04_HBM_D_VR_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type) +{ + switch (type) { + case SVI_VDDCR_VDD0_TEMP: + return AMDGPU_VDDCR_VDD0_TEMP; + case SVI_VDDCR_VDD1_TEMP: + return AMDGPU_VDDCR_VDD1_TEMP; + case SVI_VDDCR_VDD2_TEMP: + return AMDGPU_VDDCR_VDD2_TEMP; + case SVI_VDDCR_VDD3_TEMP: + return AMDGPU_VDDCR_VDD3_TEMP; + case SVI_VDDCR_SOC_A_TEMP: + return AMDGPU_VDDCR_SOC_A_TEMP; + case SVI_VDDCR_SOC_C_TEMP: + return AMDGPU_VDDCR_SOC_C_TEMP; + case SVI_VDDCR_SOCIO_A_TEMP: + return AMDGPU_VDDCR_SOCIO_A_TEMP; + case SVI_VDDCR_SOCIO_C_TEMP: + return AMDGPU_VDDCR_SOCIO_C_TEMP; + case SVI_VDD_085_HBM_TEMP: + return AMDGPU_VDD_085_HBM_TEMP; + case SVI_VDDCR_11_HBM_B_TEMP: + return AMDGPU_VDDCR_11_HBM_B_TEMP; + case SVI_VDDCR_11_HBM_D_TEMP: + return AMDGPU_VDDCR_11_HBM_D_TEMP; + case SVI_VDD_USR_TEMP: + return AMDGPU_VDD_USR_TEMP; + case SVI_VDDIO_11_E32_TEMP: + return AMDGPU_VDDIO_11_E32_TEMP; + default: + return -EINVAL; + } +} + +static enum amdgpu_system_temp smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type) +{ + switch (type) { + case SYSTEM_TEMP_UBB_FPGA: + return AMDGPU_UBB_FPGA_TEMP; + case SYSTEM_TEMP_UBB_FRONT: + return AMDGPU_UBB_FRONT_TEMP; + case SYSTEM_TEMP_UBB_BACK: + return AMDGPU_UBB_BACK_TEMP; + case SYSTEM_TEMP_UBB_OAM7: + return AMDGPU_UBB_OAM7_TEMP; + case SYSTEM_TEMP_UBB_IBC: + return AMDGPU_UBB_IBC_TEMP; + case SYSTEM_TEMP_UBB_UFPGA: + return AMDGPU_UBB_UFPGA_TEMP; + case SYSTEM_TEMP_UBB_OAM1: + return AMDGPU_UBB_OAM1_TEMP; + case SYSTEM_TEMP_OAM_0_1_HSC: + return AMDGPU_OAM_0_1_HSC_TEMP; + case SYSTEM_TEMP_OAM_2_3_HSC: + return AMDGPU_OAM_2_3_HSC_TEMP; + case SYSTEM_TEMP_OAM_4_5_HSC: + return AMDGPU_OAM_4_5_HSC_TEMP; + case SYSTEM_TEMP_OAM_6_7_HSC: + return AMDGPU_OAM_6_7_HSC_TEMP; + case SYSTEM_TEMP_UBB_FPGA_0V72_VR: + return AMDGPU_UBB_FPGA_0V72_VR_TEMP; + case SYSTEM_TEMP_UBB_FPGA_3V3_VR: + return AMDGPU_UBB_FPGA_3V3_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR: + return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR: + return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP; + case SYSTEM_TEMP_RETIMER_0_1_0V9_VR: + return AMDGPU_RETIMER_0_1_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_4_5_0V9_VR: + return AMDGPU_RETIMER_4_5_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_2_3_0V9_VR: + return AMDGPU_RETIMER_2_3_0V9_VR_TEMP; + case SYSTEM_TEMP_RETIMER_6_7_0V9_VR: + return AMDGPU_RETIMER_6_7_0V9_VR_TEMP; + case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR: + return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP; + case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR: + return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP; + case SYSTEM_TEMP_IBC_HSC: + return AMDGPU_IBC_HSC_TEMP; + case SYSTEM_TEMP_IBC: + return AMDGPU_IBC_TEMP; + default: + return -EINVAL; + } +} + +static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu, + enum smu_temp_metric_type type) +{ + switch (type) { + case SMU_TEMP_METRIC_BASEBOARD: + if (smu->adev->gmc.xgmi.physical_node_id == 0 && + smu->adev->gmc.xgmi.num_physical_nodes > 1 && + smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS))) + return true; + break; + case SMU_TEMP_METRIC_GPUBOARD: + return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)); + default: + break; + } + + return false; +} + +int smu_v13_0_12_get_npm_data(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct PPTable_t *pptable = + (struct PPTable_t *)smu_table->driver_pptable; + struct smu_table *tables = smu_table->tables; + SystemMetricsTable_t *metrics; + struct smu_table *sys_table; + int ret; + + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(NPM_METRICS))) + return -EOPNOTSUPP; + + if (sensor == AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT) { + *value = pptable->MaxNodePowerLimit; + return 0; + } + + ret = smu_v13_0_12_get_system_metrics_table(smu); + if (ret) + return ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; + + switch (sensor) { + case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: + *value = SMUQ10_ROUND(metrics->NodePowerLimit); + break; + case AMDGPU_PP_SENSOR_NODEPOWER: + *value = SMUQ10_ROUND(metrics->NodePower); + break; + case AMDGPU_PP_SENSOR_GPPTRESIDENCY: + *value = SMUQ10_ROUND(metrics->GlobalPPTResidencyAcc); + break; + default: + return -EINVAL; + } + return ret; } +static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu, + enum smu_temp_metric_type type, void *table) +{ + struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics; + struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + SystemMetricsTable_t *metrics; + struct smu_table *data_table; + struct smu_table *sys_table; + int ret, sensor_type; + u32 idx, sensors; + ssize_t size; + + if (type == SMU_TEMP_METRIC_BASEBOARD) { + /* Initialize base board temperature metrics */ + data_table = + &smu->smu_table.tables[SMU_TABLE_BASEBOARD_TEMP_METRICS]; + baseboard_temp_metrics = + (struct amdgpu_baseboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } else { + data_table = + &smu->smu_table.tables[SMU_TABLE_GPUBOARD_TEMP_METRICS]; + gpuboard_temp_metrics = + (struct amdgpu_gpuboard_temp_metrics_v1_0 *) + data_table->cache.buffer; + size = sizeof(*baseboard_temp_metrics); + } + + ret = smu_v13_0_12_get_system_metrics_table(smu); + if (ret) + return ret; + + sys_table = &tables[SMU_TABLE_PMFW_SYSTEM_METRICS]; + metrics = (SystemMetricsTable_t *)sys_table->cache.buffer; + smu_table_cache_update_time(data_table, jiffies); + + if (type == SMU_TEMP_METRIC_GPUBOARD) { + gpuboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + gpuboard_temp_metrics->label_version = metrics->LabelVersion; + gpuboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->NodeTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_node_sensor_type(sensors); + gpuboard_temp_metrics->node_temp[idx] = + ((int)metrics->NodeTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->node_temp[idx] |= (sensor_type << 24); + idx++; + } + } + + idx = 0; + + for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) { + if (metrics->VrTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_vr_sensor_type(sensors); + gpuboard_temp_metrics->vr_temp[idx] = + ((int)metrics->VrTemperatures[sensors]) & 0xFFFFFF; + gpuboard_temp_metrics->vr_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } else if (type == SMU_TEMP_METRIC_BASEBOARD) { + baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter; + baseboard_temp_metrics->label_version = metrics->LabelVersion; + baseboard_temp_metrics->node_id = metrics->NodeIdentifier; + + idx = 0; + for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) { + if (metrics->SystemTemperatures[sensors] != -1) { + sensor_type = smu_v13_0_12_get_system_sensor_type(sensors); + baseboard_temp_metrics->system_temp[idx] = + ((int)metrics->SystemTemperatures[sensors]) & 0xFFFFFF; + baseboard_temp_metrics->system_temp[idx] |= (sensor_type << 24); + idx++; + } + } + } + + memcpy(table, data_table->cache.buffer, size); + + return size; +} + ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics) { const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW; @@ -535,3 +929,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void return sizeof(*gpu_metrics); } + +const struct smu_temp_funcs smu_v13_0_12_temp_funcs = { + .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported, + .get_temp_metrics = smu_v13_0_12_get_temp_metrics, +}; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f00ef7f3f355..285cf7979693 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -143,9 +143,9 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), - MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), + MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), - MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI), + MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), @@ -177,7 +177,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), - MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 0), + MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), }; // clang-format on @@ -312,6 +312,8 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu) smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); if (fw_ver >= 0x5551200) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + if (fw_ver >= 0x5551800) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); if (fw_ver >= 0x5551600) { smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); @@ -345,6 +347,25 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) if (fw_ver >= 0x00562500) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); + + if (fw_ver >= 0x04560100) { + smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); + } + + if (fw_ver > 0x04560900) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); + + if (fw_ver >= 0x04560700) { + if (fw_ver >= 0x04560900) { + smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); + if (smu->adev->gmc.xgmi.physical_node_id == 0) + smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS)); + } else if (!amdgpu_sriov_vf(smu->adev)) + smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); + } else { + smu_v13_0_12_tables_fini(smu); + } } static void smu_v13_0_6_init_caps(struct smu_context *smu) @@ -397,19 +418,40 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) if ((pgm == 7 && fw_ver >= 0x7550E00) || (pgm == 0 && fw_ver >= 0x00557E00)) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); - if ((pgm == 0 && fw_ver >= 0x00557F01) || - (pgm == 7 && fw_ver >= 0x7551000)) { - smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); - smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); + + if (amdgpu_sriov_vf(adev)) { + if (fw_ver >= 0x00558200) + amdgpu_virt_attr_set(&adev->virt.virt_caps, + AMDGPU_VIRT_CAP_POWER_LIMIT, + AMDGPU_CAP_ATTR_RW); + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); + } + } else { + if ((pgm == 0 && fw_ver >= 0x00557F01) || + (pgm == 7 && fw_ver >= 0x7551000)) { + smu_v13_0_6_cap_set(smu, + SMU_CAP(STATIC_METRICS)); + smu_v13_0_6_cap_set(smu, + SMU_CAP(BOARD_VOLTAGE)); + } + if ((pgm == 0 && fw_ver >= 0x00558000) || + (pgm == 7 && fw_ver >= 0x7551000)) + smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } - if ((pgm == 0 && fw_ver >= 0x00558000) || - (pgm == 7 && fw_ver >= 0x7551000)) - smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); } if (((pgm == 7) && (fw_ver >= 0x7550700)) || ((pgm == 0) && (fw_ver >= 0x00557900)) || ((pgm == 4) && (fw_ver >= 0x4557000))) smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); + + if ((pgm == 0) && (fw_ver >= 0x00558200)) + smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); } static void smu_v13_0_x_init_caps(struct smu_context *smu) @@ -506,8 +548,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; struct smu_table *tables = smu_table->tables; + void *gpu_metrics_table __free(kfree) = NULL; + void *driver_pptable __free(kfree) = NULL; + void *metrics_table __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int gpu_metrcs_size = METRICS_TABLE_SIZE; + int ret; if (!(adev->flags & AMD_IS_APU)) SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, @@ -523,27 +569,36 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); - if (!smu_table->metrics_table) + SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS, + smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); + + metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); + if (!metrics_table) return -ENOMEM; smu_table->metrics_time = 0; smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8); - smu_table->gpu_metrics_table = + gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); - if (!smu_table->gpu_metrics_table) { - kfree(smu_table->metrics_table); + if (!gpu_metrics_table) return -ENOMEM; - } - smu_table->driver_pptable = - kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); - if (!smu_table->driver_pptable) { - kfree(smu_table->metrics_table); - kfree(smu_table->gpu_metrics_table); + driver_pptable = kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); + if (!driver_pptable) return -ENOMEM; + + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12)) { + ret = smu_v13_0_12_tables_init(smu); + if (ret) + return ret; } + smu_table->gpu_metrics_table = no_free_ptr(gpu_metrics_table); + smu_table->metrics_table = no_free_ptr(metrics_table); + smu_table->driver_pptable = no_free_ptr(driver_pptable); + return 0; } @@ -672,6 +727,13 @@ static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) return ret; } +static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu) +{ + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) + smu_v13_0_12_tables_fini(smu); + return smu_v13_0_fini_smc_tables(smu); +} + static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num) @@ -685,8 +747,8 @@ static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, return 0; } -static int smu_v13_0_6_get_metrics_table(struct smu_context *smu, - void *metrics_table, bool bypass_cache) +int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, + bool bypass_cache) { struct smu_table_context *smu_table = &smu->smu_table; uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; @@ -798,8 +860,10 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; int version = smu_v13_0_6_get_metrics_version(smu); - int ret, i, retry = 100; + int ret, i, retry = 100, n; uint32_t table_version; + uint16_t max_speed; + uint8_t max_width; if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) @@ -835,6 +899,9 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, version)); pptable->MinGfxclkFrequency = SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, version)); + max_width = (uint8_t)GET_METRIC_FIELD(XgmiWidth, version); + max_speed = (uint16_t)GET_METRIC_FIELD(XgmiBitrate, version); + amgpu_xgmi_set_max_speed_width(smu->adev, max_speed, max_width); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = @@ -855,6 +922,23 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; + amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, + 0, pptable->PublicSerialNumber_AID); + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, + GET_METRIC_FIELD(PublicSerialNumber_AID, + version)[i]); + } + n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD); + for (i = 0; i < n; i++) { + amdgpu_device_set_uid( + smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, + GET_METRIC_FIELD(PublicSerialNumber_XCD, + version)[i]); + } + pptable->Init = true; if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { ret = smu_v13_0_6_get_static_metrics_table(smu); @@ -871,51 +955,51 @@ static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max) { + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_table_context *smu_table = &smu->smu_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - uint32_t clock_limit = 0, param; + struct smu_13_0_dpm_table *dpm_table; + uint32_t min_clk, max_clk, param; int ret = 0, clk_id = 0; - if (!smu_cmn_clk_dpm_is_enabled(smu, clk_type)) { + /* Use dpm tables, if data is already fetched */ + if (pptable->Init) { switch (clk_type) { case SMU_MCLK: case SMU_UCLK: - if (pptable->Init) - clock_limit = pptable->UclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.uclk_table; break; case SMU_GFXCLK: case SMU_SCLK: - if (pptable->Init) - clock_limit = pptable->MinGfxclkFrequency; + dpm_table = &dpm_context->dpm_tables.gfx_table; break; case SMU_SOCCLK: - if (pptable->Init) - clock_limit = pptable->SocclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.soc_table; break; case SMU_FCLK: - if (pptable->Init) - clock_limit = pptable->FclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.fclk_table; break; case SMU_VCLK: - if (pptable->Init) - clock_limit = pptable->VclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.vclk_table; break; case SMU_DCLK: - if (pptable->Init) - clock_limit = pptable->DclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.dclk_table; break; default: - break; + return -EINVAL; } - if (min) - *min = clock_limit; + min_clk = dpm_table->min; + max_clk = dpm_table->max; + if (min) + *min = min_clk; if (max) - *max = clock_limit; + *max = max_clk; - return 0; + if (min_clk && max_clk) + return 0; } if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) { @@ -1377,8 +1461,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, return ret; } - min_clk = pstate_table->gfxclk_pstate.curr.min; - max_clk = pstate_table->gfxclk_pstate.curr.max; + single_dpm_table = &(dpm_context->dpm_tables.gfx_table); + min_clk = single_dpm_table->min; + max_clk = single_dpm_table->max; if (now < SMU_13_0_6_DSCLK_THRESHOLD) { size += sysfs_emit_at(buf, size, "S: %uMhz *\n", @@ -1720,6 +1805,15 @@ static int smu_v13_0_6_read_sensor(struct smu_context *smu, ret = -EOPNOTSUPP; break; } + case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: + case AMDGPU_PP_SENSOR_NODEPOWER: + case AMDGPU_PP_SENSOR_GPPTRESIDENCY: + case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT: + ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data); + if (ret) + return ret; + *size = 4; + break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: default: ret = -EOPNOTSUPP; @@ -2415,10 +2509,10 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v13_0_6_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2426,27 +2520,12 @@ static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } @@ -2549,9 +2628,9 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; int version = smu_v13_0_6_get_metrics_version(smu); struct amdgpu_partition_metrics_v1_0 *xcp_metrics; + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; struct amdgpu_device *adev = smu->adev; int ret, inst, i, j, k, idx; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; @@ -2576,17 +2655,14 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, return -ENOMEM; ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); - if (ret) { - kfree(metrics_v0); + if (ret) return ret; - } if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - ret = smu_v13_0_12_get_xcp_metrics(smu, xcp, table, metrics_v0); - goto out; - } + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_xcp_metrics(smu, xcp, table, + metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2657,8 +2733,6 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, idx++; } } -out: - kfree(metrics_v0); return sizeof(*xcp_metrics); } @@ -2669,31 +2743,26 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct gpu_metrics_v1_8 *gpu_metrics = (struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table; int version = smu_v13_0_6_get_metrics_version(smu); + MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; - MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; u16 link_width_level; - ssize_t num_bytes; u8 num_jpeg_rings; u32 inst_mask; bool per_inst; metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, true); - if (ret) { - kfree(metrics_v0); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); + if (ret) return ret; - } - if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && - smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { - num_bytes = smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); - kfree(metrics_v0); - return num_bytes; - } + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12) && + smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) + return smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0); metrics_v1 = (MetricsTableV1_t *)metrics_v0; metrics_v2 = (MetricsTableV2_t *)metrics_v0; @@ -2879,7 +2948,6 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); *table = (void *)gpu_metrics; - kfree(metrics_v0); return sizeof(*gpu_metrics); } @@ -3065,7 +3133,7 @@ static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int var = (adev->pdev->device & 0xF); - if (var == 0x1) + if (var == 0x0 || var == 0x1 || var == 0x3) return true; return false; @@ -3141,6 +3209,11 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) return ret; } +static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu) +{ + return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET)); +} + static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) { int ret = 0; @@ -3154,6 +3227,20 @@ static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) } +static int smu_v13_0_6_post_init(struct smu_context *smu) +{ + if (smu_v13_0_6_is_link_reset_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET); + + if (smu_v13_0_6_reset_sdma_is_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); + + if (smu_v13_0_6_reset_vcn_is_supported(smu)) + smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET); + + return 0; +} + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3770,6 +3857,12 @@ static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { .parse_error_code = aca_smu_parse_error_code, }; +static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) +{ + smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) + == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; +} + static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { /* init dpm */ .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, @@ -3786,7 +3879,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .init_microcode = smu_v13_0_6_init_microcode, .fini_microcode = smu_v13_0_fini_microcode, .init_smc_tables = smu_v13_0_6_init_smc_tables, - .fini_smc_tables = smu_v13_0_fini_smc_tables, + .fini_smc_tables = smu_v13_0_6_fini_smc_tables, .init_power = smu_v13_0_init_power, .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_6_check_fw_status, @@ -3817,7 +3910,6 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .get_xcp_metrics = smu_v13_0_6_get_xcp_metrics, .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, - .link_reset_is_support = smu_v13_0_6_is_link_reset_supported, .mode1_reset = smu_v13_0_6_mode1_reset, .mode2_reset = smu_v13_0_6_mode2_reset, .link_reset = smu_v13_0_6_link_reset, @@ -3827,8 +3919,8 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, .send_rma_reason = smu_v13_0_6_send_rma_reason, .reset_sdma = smu_v13_0_6_reset_sdma, - .reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported, .dpm_reset_vcn = smu_v13_0_6_reset_vcn, + .post_init = smu_v13_0_6_post_init, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) @@ -3840,9 +3932,11 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; smu->table_map = smu_v13_0_6_table_map; - smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION; + smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION; smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; smu_v13_0_set_smu_mailbox_registers(smu); + smu_v13_0_6_set_temp_funcs(smu); amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); } + diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index d38d6d76b1e7..7ef5f3e66c27 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -49,6 +49,7 @@ struct PPTable_t { uint32_t MaxLclkDpmRange; uint32_t MinLclkDpmRange; uint64_t PublicSerialNumber_AID; + uint32_t MaxNodePowerLimit; bool Init; }; @@ -64,19 +65,25 @@ enum smu_v13_0_6_caps { SMU_CAP(RMA_MSG), SMU_CAP(ACA_SYND), SMU_CAP(SDMA_RESET), + SMU_CAP(VCN_RESET), SMU_CAP(STATIC_METRICS), SMU_CAP(HST_LIMIT_METRICS), SMU_CAP(BOARD_VOLTAGE), SMU_CAP(PLDM_VERSION), + SMU_CAP(TEMP_METRICS), + SMU_CAP(NPM_METRICS), SMU_CAP(ALL), }; extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap); int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu); +int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, + bool bypass_cache); bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); int smu_v13_0_12_get_max_metrics_size(void); +size_t smu_v13_0_12_get_system_metrics_size(void); int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu); int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value); @@ -84,6 +91,12 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics); +int smu_v13_0_12_tables_init(struct smu_context *smu); +void smu_v13_0_12_tables_fini(struct smu_context *smu); +int smu_v13_0_12_get_npm_data(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value); extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[]; +extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs; #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c8f4f6fb4083..c96fa5e49ed6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -579,8 +579,6 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) PPTable_t *driver_ppt = smu->smu_table.driver_pptable; SkuTable_t *skutable = &driver_ppt->SkuTable; struct smu_13_0_dpm_table *dpm_table; - struct smu_13_0_pcie_table *pcie_table; - uint32_t link_level; int ret = 0; /* socclk dpm table setup */ @@ -687,24 +685,6 @@ static int smu_v13_0_7_set_default_dpm_table(struct smu_context *smu) dpm_table->max = dpm_table->dpm_levels[0].value; } - /* lclk dpm table setup */ - pcie_table = &dpm_context->dpm_tables.pcie_table; - pcie_table->num_of_link_levels = 0; - for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { - if (!skutable->PcieGenSpeed[link_level] && - !skutable->PcieLaneCount[link_level] && - !skutable->LclkFreq[link_level]) - continue; - - pcie_table->pcie_gen[pcie_table->num_of_link_levels] = - skutable->PcieGenSpeed[link_level]; - pcie_table->pcie_lane[pcie_table->num_of_link_levels] = - skutable->PcieLaneCount[link_level]; - pcie_table->clk_freq[pcie_table->num_of_link_levels] = - skutable->LclkFreq[link_level]; - pcie_table->num_of_link_levels++; - } - /* dcefclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.dcef_table; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) { @@ -2739,6 +2719,89 @@ static int smu_v13_0_7_set_power_limit(struct smu_context *smu, return 0; } +static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu, + uint8_t pcie_gen_cap, + uint8_t pcie_width_cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_13_0_pcie_table *pcie_table = + &dpm_context->dpm_tables.pcie_table; + int num_of_levels; + int link_level; + uint32_t smu_pcie_arg; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + int ret = 0; + int i; + + pcie_table->num_of_link_levels = 0; + for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { + if (!skutable->PcieGenSpeed[link_level] && + !skutable->PcieLaneCount[link_level] && + !skutable->LclkFreq[link_level]) + continue; + + pcie_table->pcie_gen[pcie_table->num_of_link_levels] = + skutable->PcieGenSpeed[link_level]; + pcie_table->pcie_lane[pcie_table->num_of_link_levels] = + skutable->PcieLaneCount[link_level]; + pcie_table->clk_freq[pcie_table->num_of_link_levels] = + skutable->LclkFreq[link_level]; + pcie_table->num_of_link_levels++; + } + + num_of_levels = pcie_table->num_of_link_levels; + if (!num_of_levels) + return 0; + + if (!(smu->adev->pm.pp_feature & PP_PCIE_DPM_MASK)) { + if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) + pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; + + if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap) + pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1]; + + /* Force all levels to use the same settings */ + for (i = 0; i < num_of_levels; i++) { + pcie_table->pcie_gen[i] = pcie_gen_cap; + pcie_table->pcie_lane[i] = pcie_width_cap; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } + } else { + for (i = 0; i < num_of_levels; i++) { + if (pcie_table->pcie_gen[i] > pcie_gen_cap || + pcie_table->pcie_lane[i] > pcie_width_cap) { + pcie_table->pcie_gen[i] = pcie_table->pcie_gen[i] > pcie_gen_cap ? + pcie_gen_cap : pcie_table->pcie_gen[i]; + pcie_table->pcie_lane[i] = pcie_table->pcie_lane[i] > pcie_width_cap ? + pcie_width_cap : pcie_table->pcie_lane[i]; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; + } + } + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -2768,7 +2831,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .feature_is_enabled = smu_cmn_feature_is_enabled, .print_clk_levels = smu_v13_0_7_print_clk_levels, .force_clk_levels = smu_v13_0_7_force_clk_levels, - .update_pcie_parameters = smu_v13_0_update_pcie_parameters, + .update_pcie_parameters = smu_v13_0_7_update_pcie_parameters, .get_thermal_temperature_range = smu_v13_0_7_get_thermal_temperature_range, .register_irq_handler = smu_v13_0_register_irq_handler, .enable_thermal_alert = smu_v13_0_enable_thermal_alert, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 76c1adda83db..f9b0938c57ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -62,13 +62,14 @@ const int decoded_link_width[8] = {0, 1, 2, 4, 8, 12, 16, 32}; MODULE_FIRMWARE("amdgpu/smu_14_0_2.bin"); MODULE_FIRMWARE("amdgpu/smu_14_0_3.bin"); +MODULE_FIRMWARE("amdgpu/smu_14_0_3_kicker.bin"); #define ENABLE_IMU_ARG_GFXOFF_ENABLE 1 int smu_v14_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -79,8 +80,12 @@ int smu_v14_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c index 84f9b007b59f..fe00c84b1cc6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c @@ -1207,11 +1207,13 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu, static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, - uint32_t max) + u32 min, + u32 max, + bool __always_unused automatic) { - enum smu_message_type msg_set_min, msg_set_max; - int ret = 0; + enum smu_message_type msg_set_min = SMU_MSG_MAX_COUNT; + enum smu_message_type msg_set_max = SMU_MSG_MAX_COUNT; + int ret = -EINVAL; if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) return -EINVAL; @@ -1240,16 +1242,23 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu, msg_set_min = SMU_MSG_SetHardMinVcn1; msg_set_max = SMU_MSG_SetSoftMaxVcn1; break; + case SMU_ISPICLK: + msg_set_min = SMU_MSG_SetHardMinIspiclkByFreq; + break; + case SMU_ISPXCLK: + msg_set_min = SMU_MSG_SetHardMinIspxclkByFreq; + break; default: return -EINVAL; } - ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL); - if (ret) - return ret; + if (min && msg_set_min != SMU_MSG_MAX_COUNT) + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_min, min, NULL); + + if (max && msg_set_max != SMU_MSG_MAX_COUNT) + ret = smu_cmn_send_smc_msg_with_param(smu, msg_set_max, max, NULL); - return smu_cmn_send_smc_msg_with_param(smu, msg_set_max, - max, NULL); + return ret; } static int smu_v14_0_0_force_clk_levels(struct smu_context *smu, @@ -1278,7 +1287,7 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu, if (ret) break; - ret = smu_v14_0_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); + ret = smu_v14_0_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq, false); break; default: ret = -EINVAL; @@ -1426,7 +1435,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_min, - sclk_max); + sclk_max, + false); if (ret) return ret; @@ -1438,7 +1448,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_FCLK, fclk_min, - fclk_max); + fclk_max, + false); if (ret) return ret; } @@ -1447,7 +1458,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk_min, - socclk_max); + socclk_max, + false); if (ret) return ret; } @@ -1456,7 +1468,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_VCLK, vclk_min, - vclk_max); + vclk_max, + false); if (ret) return ret; } @@ -1465,7 +1478,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_VCLK1, vclk1_min, - vclk1_max); + vclk1_max, + false); if (ret) return ret; } @@ -1474,7 +1488,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_DCLK, dclk_min, - dclk_max); + dclk_max, + false); if (ret) return ret; } @@ -1483,7 +1498,8 @@ static int smu_v14_0_common_set_performance_level(struct smu_context *smu, ret = smu_v14_0_0_set_soft_freq_limited_range(smu, SMU_DCLK1, dclk1_min, - dclk1_max); + dclk1_max, + false); if (ret) return ret; } @@ -1533,6 +1549,14 @@ static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu, 0, NULL); } +static int smu_v14_0_0_set_isp_enable(struct smu_context *smu, + bool enable) +{ + return smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpIspByTile : SMU_MSG_PowerDownIspByTile, + ISP_ALL_TILES_MASK, NULL); +} + static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu, bool enable) { @@ -1662,6 +1686,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .gfx_off_control = smu_v14_0_gfx_off_control, .mode2_reset = smu_v14_0_0_mode2_reset, .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq, + .set_soft_freq_limited_range = smu_v14_0_0_set_soft_freq_limited_range, .od_edit_dpm_table = smu_v14_0_od_edit_dpm_table, .print_clk_levels = smu_v14_0_0_print_clk_levels, .force_clk_levels = smu_v14_0_0_force_clk_levels, @@ -1669,6 +1694,7 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = { .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters, .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu, .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable, + .dpm_set_isp_enable = smu_v14_0_0_set_isp_enable, .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable, .get_dpm_clock_table = smu_v14_0_common_get_dpm_table, .set_mall_enable = smu_v14_0_common_set_mall_enable, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 82c2db972491..086501cc5213 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -502,8 +502,6 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) PPTable_t *pptable = table_context->driver_pptable; SkuTable_t *skutable = &pptable->SkuTable; struct smu_14_0_dpm_table *dpm_table; - struct smu_14_0_pcie_table *pcie_table; - uint32_t link_level; int ret = 0; /* socclk dpm table setup */ @@ -619,27 +617,6 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) dpm_table->max = dpm_table->dpm_levels[0].value; } - /* lclk dpm table setup */ - pcie_table = &dpm_context->dpm_tables.pcie_table; - pcie_table->num_of_link_levels = 0; - for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { - if (!skutable->PcieGenSpeed[link_level] && - !skutable->PcieLaneCount[link_level] && - !skutable->LclkFreq[link_level]) - continue; - - pcie_table->pcie_gen[pcie_table->num_of_link_levels] = - skutable->PcieGenSpeed[link_level]; - pcie_table->pcie_lane[pcie_table->num_of_link_levels] = - skutable->PcieLaneCount[link_level]; - pcie_table->clk_freq[pcie_table->num_of_link_levels] = - skutable->LclkFreq[link_level]; - pcie_table->num_of_link_levels++; - - if (link_level == 0) - link_level++; - } - /* dcefclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.dcef_table; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_DCN_BIT)) { @@ -1487,10 +1464,31 @@ static int smu_v14_0_2_update_pcie_parameters(struct smu_context *smu, struct smu_14_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_14_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; - int num_of_levels = pcie_table->num_of_link_levels; + int num_of_levels; uint32_t smu_pcie_arg; - int ret, i; + uint32_t link_level; + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + SkuTable_t *skutable = &pptable->SkuTable; + int ret = 0; + int i; + pcie_table->num_of_link_levels = 0; + for (link_level = 0; link_level < NUM_LINK_LEVELS; link_level++) { + if (!skutable->PcieGenSpeed[link_level] && + !skutable->PcieLaneCount[link_level] && + !skutable->LclkFreq[link_level]) + continue; + + pcie_table->pcie_gen[pcie_table->num_of_link_levels] = + skutable->PcieGenSpeed[link_level]; + pcie_table->pcie_lane[pcie_table->num_of_link_levels] = + skutable->PcieLaneCount[link_level]; + pcie_table->clk_freq[pcie_table->num_of_link_levels] = + skutable->LclkFreq[link_level]; + pcie_table->num_of_link_levels++; + } + num_of_levels = pcie_table->num_of_link_levels; if (!num_of_levels) return 0; @@ -1505,30 +1503,40 @@ static int smu_v14_0_2_update_pcie_parameters(struct smu_context *smu, for (i = 0; i < num_of_levels; i++) { pcie_table->pcie_gen[i] = pcie_gen_cap; pcie_table->pcie_lane[i] = pcie_width_cap; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + break; } } else { for (i = 0; i < num_of_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - } - } - - for (i = 0; i < num_of_levels; i++) { - smu_pcie_arg = i << 16; - smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; - smu_pcie_arg |= pcie_table->pcie_lane[i]; - - ret = smu_cmn_send_smc_msg_with_param(smu, + if (pcie_table->pcie_gen[i] > pcie_gen_cap || + pcie_table->pcie_lane[i] > pcie_width_cap) { + pcie_table->pcie_gen[i] = pcie_table->pcie_gen[i] > pcie_gen_cap ? + pcie_gen_cap : pcie_table->pcie_gen[i]; + pcie_table->pcie_lane[i] = pcie_table->pcie_lane[i] > pcie_width_cap ? + pcie_width_cap : pcie_table->pcie_lane[i]; + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg, NULL); - if (ret) - return ret; + if (ret) + break; + } + } } - return 0; + return ret; } static const struct smu_temperature_range smu14_thermal_policy[] = { @@ -1689,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *min_power_limit) { struct smu_table_context *table_context = &smu->smu_table; + struct smu_14_0_2_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = table_context->driver_pptable; CustomSkuTable_t *skutable = &pptable->CustomSkuTable; - uint32_t power_limit; + uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0; uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; if (smu_v14_0_get_current_power_limit(smu, &power_limit)) @@ -1704,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (max_power_limit) - *max_power_limit = msg_limit; + if (powerplay_table) { + if (smu->od_enabled && + smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) { + od_percent_upper = 0; + od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt; + } + } - if (min_power_limit) - *min_power_limit = 0; + dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", + od_percent_upper, od_percent_lower, power_limit); + + if (max_power_limit) { + *max_power_limit = msg_limit * (100 + od_percent_upper); + *max_power_limit /= 100; + } + + if (min_power_limit) { + *min_power_limit = power_limit * (100 + od_percent_lower); + *min_power_limit /= 100; + } return 0; } @@ -2059,10 +2087,10 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu) control->quirks = &smu_v14_0_2_i2c_control_quirks; i2c_set_adapdata(control, smu_i2c); - res = i2c_add_adapter(control); + res = devm_i2c_add_adapter(adev->dev, control); if (res) { DRM_ERROR("Failed to register hw i2c, err: %d\n", res); - goto Out_err; + return res; } } @@ -2072,27 +2100,12 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; return 0; -Out_err: - for ( ; i >= 0; i--) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } - return res; } static void smu_v14_0_2_i2c_control_fini(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int i; - for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { - struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; - struct i2c_adapter *control = &smu_i2c->adapter; - - i2c_del_adapter(control); - } adev->pm.ras_eeprom_i2c_bus = NULL; adev->pm.fru_eeprom_i2c_bus = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 7eaf58fd7f9a..a8961a8f5c42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -86,6 +86,7 @@ static void smu_cmn_read_arg(struct smu_context *smu, #define SMU_RESP_BUSY_OTHER 0xFC #define SMU_RESP_DEBUG_END 0xFB +#define SMU_RESP_UNEXP (~0U) /** * __smu_cmn_poll_stat -- poll for a status from the SMU * @smu: a pointer to SMU context @@ -171,6 +172,15 @@ static void __smu_cmn_reg_print_error(struct smu_context *smu, dev_err_ratelimited(adev->dev, "SMU: I'm debugging!"); break; + case SMU_RESP_UNEXP: + if (amdgpu_device_bus_status_check(smu->adev)) { + /* print error immediately if device is off the bus */ + dev_err(adev->dev, + "SMU: response:0x%08X for index:%d param:0x%08X message:%s?", + reg_c2pmsg_90, msg_index, param, message); + break; + } + fallthrough; default: dev_err_ratelimited(adev->dev, "SMU: response:0x%08X for index:%d param:0x%08X message:%s?", @@ -246,11 +256,12 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, { struct amdgpu_device *adev = smu->adev; uint32_t flags, resp; - bool fed_status; + bool fed_status, pri; flags = __smu_cmn_get_msg_flags(smu, msg); *poll = true; + pri = !!(flags & SMU_MSG_NO_PRECHECK); /* When there is RAS fatal error, FW won't process non-RAS priority * messages. Don't allow any messages other than RAS priority messages. */ @@ -262,15 +273,18 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu, smu_get_message_name(smu, msg)); return -EACCES; } + } + if (pri || fed_status) { /* FW will ignore non-priority messages when a RAS fatal error - * is detected. Hence it is possible that a previous message - * wouldn't have got response. Allow to continue without polling - * for response status for priority messages. + * or reset condition is detected. Hence it is possible that a + * previous message wouldn't have got response. Allow to + * continue without polling for response status for priority + * messages. */ resp = RREG32(smu->resp_reg); dev_dbg(adev->dev, - "Sending RAS priority message %s response status: %x", + "Sending priority message %s response status: %x", smu_get_message_name(smu, msg), resp); if (resp == 0) *poll = false; @@ -955,7 +969,7 @@ int smu_cmn_update_table(struct smu_context *smu, table_index); uint32_t table_size; int ret = 0; - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) + if (!table_data || table_index >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; table_size = smu_table->tables[table_index].size; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 7473672abd2a..0ae91c8b6d72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -40,28 +40,57 @@ #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 #define SMU_IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 -#define smu_cmn_init_soft_gpu_metrics(ptr, frev, crev) \ - do { \ - typecheck(struct gpu_metrics_v##frev##_##crev, \ - typeof(*(ptr))); \ - struct metrics_table_header *header = \ - (struct metrics_table_header *)(ptr); \ - memset(header, 0xFF, sizeof(*(ptr))); \ - header->format_revision = frev; \ - header->content_revision = crev; \ - header->structure_size = sizeof(*(ptr)); \ +#define SMU_IGNORE_IF_VERSION 0xFFFFFFFF + +#define smu_cmn_init_soft_gpu_metrics(ptr, frev, crev) \ + do { \ + typecheck(struct gpu_metrics_v##frev##_##crev *, (ptr)); \ + struct gpu_metrics_v##frev##_##crev *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = frev; \ + header->content_revision = crev; \ + header->structure_size = sizeof(*tmp); \ } while (0) -#define smu_cmn_init_partition_metrics(ptr, frev, crev) \ - do { \ - typecheck(struct amdgpu_partition_metrics_v##frev##_##crev, \ - typeof(*(ptr))); \ - struct metrics_table_header *header = \ - (struct metrics_table_header *)(ptr); \ - memset(header, 0xFF, sizeof(*(ptr))); \ - header->format_revision = frev; \ - header->content_revision = crev; \ - header->structure_size = sizeof(*(ptr)); \ +#define smu_cmn_init_partition_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_partition_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_partition_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + +#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ + } while (0) + +#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr) \ + do { \ + typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *, \ + (ptr)); \ + struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \ + struct metrics_table_header *header = \ + (struct metrics_table_header *)tmp; \ + memset(header, 0xFF, sizeof(*tmp)); \ + header->format_revision = fr; \ + header->content_revision = cr; \ + header->structure_size = sizeof(*tmp); \ } while (0) extern const int link_speed[]; diff --git a/drivers/gpu/drm/amd/ras/rascore/Makefile b/drivers/gpu/drm/amd/ras/rascore/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/Makefile diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h new file mode 100644 index 000000000000..144fbe4ceb9a --- /dev/null +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core_status.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RAS_CORE_STATUS_H__ +#define __RAS_CORE_STATUS_H__ + +#define RAS_CORE_OK 0 +#define RAS_CORE_NOT_SUPPORTED 248 +#define RAS_CORE_FAIL_ERROR_QUERY 249 +#define RAS_CORE_FAIL_ERROR_INJECTION 250 +#define RAS_CORE_FAIL_FATAL_RECOVERY 251 +#define RAS_CORE_FAIL_POISON_CONSUMPTION 252 +#define RAS_CORE_FAIL_POISON_CREATION 253 +#define RAS_CORE_FAIL_NO_VALID_BANKS 254 +#define RAS_CORE_GPU_IN_MODE1_RESET 255 +#endif diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index df5da5a44755..901f938aefe0 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -157,6 +157,7 @@ komeda_fb_none_afbc_size_check(struct komeda_dev *mdev, struct komeda_fb *kfb, struct drm_framebuffer * komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct komeda_dev *mdev = dev->dev_private; @@ -177,7 +178,7 @@ komeda_fb_create(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); } - drm_helper_mode_fill_fb_struct(dev, &kfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &kfb->base, info, mode_cmd); if (kfb->base.modifier) ret = komeda_fb_afbc_size_check(kfb, file, mode_cmd); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h index c61ca98a3a63..02b2b8ae482a 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h @@ -37,6 +37,7 @@ struct komeda_fb { struct drm_framebuffer * komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int komeda_fb_check_src_coords(const struct komeda_fb *kfb, u32 src_x, u32 src_y, u32 src_w, u32 src_h); diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index e083021e9e99..bc5f5e9798c3 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -306,10 +306,10 @@ malidp_verify_afbc_framebuffer_caps(struct drm_device *dev, static bool malidp_verify_afbc_framebuffer_size(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { int n_superblocks = 0; - const struct drm_format_info *info; struct drm_gem_object *objs = NULL; u32 afbc_superblock_size = 0, afbc_superblock_height = 0; u32 afbc_superblock_width = 0, afbc_size = 0; @@ -325,8 +325,6 @@ malidp_verify_afbc_framebuffer_size(struct drm_device *dev, return false; } - info = drm_get_format_info(dev, mode_cmd); - n_superblocks = (mode_cmd->width / afbc_superblock_width) * (mode_cmd->height / afbc_superblock_height); @@ -366,24 +364,26 @@ malidp_verify_afbc_framebuffer_size(struct drm_device *dev, static bool malidp_verify_afbc_framebuffer(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { if (malidp_verify_afbc_framebuffer_caps(dev, mode_cmd)) - return malidp_verify_afbc_framebuffer_size(dev, file, mode_cmd); + return malidp_verify_afbc_framebuffer_size(dev, file, info, mode_cmd); return false; } static struct drm_framebuffer * malidp_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { if (mode_cmd->modifier[0]) { - if (!malidp_verify_afbc_framebuffer(dev, file, mode_cmd)) + if (!malidp_verify_afbc_framebuffer(dev, file, info, mode_cmd)) return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file, mode_cmd); + return drm_gem_fb_create(dev, file, info, mode_cmd); } static const struct drm_mode_config_funcs malidp_mode_config_funcs = { diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 34547edf1ee3..87f2e5ee8790 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -159,7 +159,7 @@ bool malidp_format_mod_supported(struct drm_device *drm, } if (!fourcc_mod_is_vendor(modifier, ARM)) { - DRM_ERROR("Unknown modifier (not Arm)\n"); + DRM_DEBUG_KMS("Unknown modifier (not Arm)\n"); return false; } diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index cf2e88218dc0..aa4289127086 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -18,7 +18,9 @@ static const struct drm_framebuffer_funcs armada_fb_funcs = { }; struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode, struct armada_gem_object *obj) + const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode, + struct armada_gem_object *obj) { struct armada_framebuffer *dfb; uint8_t format, config; @@ -64,7 +66,7 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, dfb->mod = config; dfb->fb.obj[0] = &obj->obj; - drm_helper_mode_fill_fb_struct(dev, &dfb->fb, mode); + drm_helper_mode_fill_fb_struct(dev, &dfb->fb, info, mode); ret = drm_framebuffer_init(dev, &dfb->fb, &armada_fb_funcs); if (ret) { @@ -84,9 +86,9 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, } struct drm_framebuffer *armada_fb_create(struct drm_device *dev, - struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode) + struct drm_file *dfile, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode) { - const struct drm_format_info *info = drm_get_format_info(dev, mode); struct armada_gem_object *obj; struct armada_framebuffer *dfb; int ret; @@ -122,7 +124,7 @@ struct drm_framebuffer *armada_fb_create(struct drm_device *dev, goto err_unref; } - dfb = armada_framebuffer_create(dev, mode, obj); + dfb = armada_framebuffer_create(dev, info, mode, obj); if (IS_ERR(dfb)) { ret = PTR_ERR(dfb); goto err; diff --git a/drivers/gpu/drm/armada/armada_fb.h b/drivers/gpu/drm/armada/armada_fb.h index c5bc53d7e0c4..f2b990f055a2 100644 --- a/drivers/gpu/drm/armada/armada_fb.h +++ b/drivers/gpu/drm/armada/armada_fb.h @@ -17,7 +17,9 @@ struct armada_framebuffer { #define drm_fb_obj(fb) drm_to_armada_gem((fb)->obj[0]) struct armada_framebuffer *armada_framebuffer_create(struct drm_device *, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *, struct armada_gem_object *); struct drm_framebuffer *armada_fb_create(struct drm_device *dev, - struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode); + struct drm_file *dfile, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode); #endif diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c index 6ee7ce04ee71..cb53cc91bafb 100644 --- a/drivers/gpu/drm/armada/armada_fbdev.c +++ b/drivers/gpu/drm/armada/armada_fbdev.c @@ -78,7 +78,10 @@ int armada_fbdev_driver_fbdev_probe(struct drm_fb_helper *fbh, return -ENOMEM; } - dfb = armada_framebuffer_create(dev, &mode, obj); + dfb = armada_framebuffer_create(dev, + drm_get_format_info(dev, mode.pixel_format, + mode.modifier[0]), + &mode, obj); /* * A reference is now held by the framebuffer object if diff --git a/drivers/gpu/drm/ast/Makefile b/drivers/gpu/drm/ast/Makefile index 8d09ba5d5889..2547613155da 100644 --- a/drivers/gpu/drm/ast/Makefile +++ b/drivers/gpu/drm/ast/Makefile @@ -4,6 +4,11 @@ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. ast-y := \ + ast_2000.o \ + ast_2100.o \ + ast_2300.o \ + ast_2500.o \ + ast_2600.o \ ast_cursor.o \ ast_ddc.o \ ast_dp501.o \ diff --git a/drivers/gpu/drm/ast/ast_2000.c b/drivers/gpu/drm/ast/ast_2000.c new file mode 100644 index 000000000000..41c2aa1e425a --- /dev/null +++ b/drivers/gpu/drm/ast/ast_2000.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: Dave Airlie <airlied@redhat.com> + */ + +#include <linux/delay.h> + +#include "ast_drv.h" +#include "ast_post.h" + +/* + * POST + */ + +void ast_2000_set_def_ext_reg(struct ast_device *ast) +{ + static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; + u8 i, index, reg; + const u8 *ext_reg_info; + + /* reset scratch */ + for (i = 0x81; i <= 0x9f; i++) + ast_set_index_reg(ast, AST_IO_VGACRI, i, 0x00); + + ext_reg_info = extreginfo; + index = 0xa0; + while (*ext_reg_info != 0xff) { + ast_set_index_reg_mask(ast, AST_IO_VGACRI, index, 0x00, *ext_reg_info); + index++; + ext_reg_info++; + } + + /* disable standard IO/MEM decode if secondary */ + /* ast_set_index_reg-mask(ast, AST_IO_VGACRI, 0xa1, 0xff, 0x3); */ + + /* Set Ext. Default */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x8c, 0x00, 0x01); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb7, 0x00, 0x00); + + /* Enable RAMDAC for A1 */ + reg = 0x04; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xff, reg); +} + +static const struct ast_dramstruct ast2000_dram_table_data[] = { + { 0x0108, 0x00000000 }, + { 0x0120, 0x00004a21 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0000, 0xFFFFFFFF }, + AST_DRAMSTRUCT_INIT(DRAM_TYPE, 0x00000089), + { 0x0008, 0x22331353 }, + { 0x000C, 0x0d07000b }, + { 0x0010, 0x11113333 }, + { 0x0020, 0x00110350 }, + { 0x0028, 0x1e0828f0 }, + { 0x0024, 0x00000001 }, + { 0x001C, 0x00000000 }, + { 0x0014, 0x00000003 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0018, 0x00000131 }, + { 0x0014, 0x00000001 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0018, 0x00000031 }, + { 0x0014, 0x00000001 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0028, 0x1e0828f1 }, + { 0x0024, 0x00000003 }, + { 0x002C, 0x1f0f28fb }, + { 0x0030, 0xFFFFFE01 }, + AST_DRAMSTRUCT_INVALID, +}; + +static void ast_post_chip_2000(struct ast_device *ast) +{ + u8 j; + u32 temp, i; + const struct ast_dramstruct *dram_reg_info; + + j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + + if ((j & 0x80) == 0) { /* VGA only */ + dram_reg_info = ast2000_dram_table_data; + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + ast_write32(ast, 0x10100, 0xa8); + + do { + ; + } while (ast_read32(ast, 0x10100) != 0xa8); + + while (!AST_DRAMSTRUCT_IS(dram_reg_info, INVALID)) { + if (AST_DRAMSTRUCT_IS(dram_reg_info, UDELAY)) { + for (i = 0; i < 15; i++) + udelay(dram_reg_info->data); + } else { + ast_write32(ast, 0x10000 + dram_reg_info->index, + dram_reg_info->data); + } + dram_reg_info++; + } + + temp = ast_read32(ast, 0x10140); + ast_write32(ast, 0x10140, temp | 0x40); + } + + /* wait ready */ + do { + j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + } while ((j & 0x40) == 0); +} + +int ast_2000_post(struct ast_device *ast) +{ + ast_2000_set_def_ext_reg(ast); + + if (ast->config_mode == ast_use_p2a) { + ast_post_chip_2000(ast); + } else { + if (ast->tx_chip == AST_TX_SIL164) { + /* Enable DVO */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/ast/ast_2100.c b/drivers/gpu/drm/ast/ast_2100.c new file mode 100644 index 000000000000..829e3b8b0d19 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_2100.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: Dave Airlie <airlied@redhat.com> + */ + +#include <linux/delay.h> + +#include "ast_drv.h" +#include "ast_post.h" + +/* + * DRAM type + */ + +static enum ast_dram_layout ast_2100_get_dram_layout_p2a(struct ast_device *ast) +{ + u32 mcr_cfg; + enum ast_dram_layout dram_layout; + + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + mcr_cfg = ast_read32(ast, 0x10004); + + switch (mcr_cfg & 0x0c) { + case 0: + case 4: + default: + dram_layout = AST_DRAM_512Mx16; + break; + case 8: + if (mcr_cfg & 0x40) + dram_layout = AST_DRAM_1Gx16; + else + dram_layout = AST_DRAM_512Mx32; + break; + case 0xc: + dram_layout = AST_DRAM_1Gx32; + break; + } + + return dram_layout; +} + +/* + * POST + */ + +static const struct ast_dramstruct ast1100_dram_table_data[] = { + { 0x2000, 0x1688a8a8 }, + { 0x2020, 0x000041f0 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0000, 0xfc600309 }, + { 0x006C, 0x00909090 }, + { 0x0064, 0x00050000 }, + AST_DRAMSTRUCT_INIT(DRAM_TYPE, 0x00000585), + { 0x0008, 0x0011030f }, + { 0x0010, 0x22201724 }, + { 0x0018, 0x1e29011a }, + { 0x0020, 0x00c82222 }, + { 0x0014, 0x01001523 }, + { 0x001C, 0x1024010d }, + { 0x0024, 0x00cb2522 }, + { 0x0038, 0xffffff82 }, + { 0x003C, 0x00000000 }, + { 0x0040, 0x00000000 }, + { 0x0044, 0x00000000 }, + { 0x0048, 0x00000000 }, + { 0x004C, 0x00000000 }, + { 0x0050, 0x00000000 }, + { 0x0054, 0x00000000 }, + { 0x0058, 0x00000000 }, + { 0x005C, 0x00000000 }, + { 0x0060, 0x032aa02a }, + { 0x0064, 0x002d3000 }, + { 0x0068, 0x00000000 }, + { 0x0070, 0x00000000 }, + { 0x0074, 0x00000000 }, + { 0x0078, 0x00000000 }, + { 0x007C, 0x00000000 }, + { 0x0034, 0x00000001 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x002C, 0x00000732 }, + { 0x0030, 0x00000040 }, + { 0x0028, 0x00000005 }, + { 0x0028, 0x00000007 }, + { 0x0028, 0x00000003 }, + { 0x0028, 0x00000001 }, + { 0x000C, 0x00005a08 }, + { 0x002C, 0x00000632 }, + { 0x0028, 0x00000001 }, + { 0x0030, 0x000003c0 }, + { 0x0028, 0x00000003 }, + { 0x0030, 0x00000040 }, + { 0x0028, 0x00000003 }, + { 0x000C, 0x00005a21 }, + { 0x0034, 0x00007c03 }, + { 0x0120, 0x00004c41 }, + AST_DRAMSTRUCT_INVALID, +}; + +static const struct ast_dramstruct ast2100_dram_table_data[] = { + { 0x2000, 0x1688a8a8 }, + { 0x2020, 0x00004120 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x0000, 0xfc600309 }, + { 0x006C, 0x00909090 }, + { 0x0064, 0x00070000 }, + AST_DRAMSTRUCT_INIT(DRAM_TYPE, 0x00000489), + { 0x0008, 0x0011030f }, + { 0x0010, 0x32302926 }, + { 0x0018, 0x274c0122 }, + { 0x0020, 0x00ce2222 }, + { 0x0014, 0x01001523 }, + { 0x001C, 0x1024010d }, + { 0x0024, 0x00cb2522 }, + { 0x0038, 0xffffff82 }, + { 0x003C, 0x00000000 }, + { 0x0040, 0x00000000 }, + { 0x0044, 0x00000000 }, + { 0x0048, 0x00000000 }, + { 0x004C, 0x00000000 }, + { 0x0050, 0x00000000 }, + { 0x0054, 0x00000000 }, + { 0x0058, 0x00000000 }, + { 0x005C, 0x00000000 }, + { 0x0060, 0x0f2aa02a }, + { 0x0064, 0x003f3005 }, + { 0x0068, 0x02020202 }, + { 0x0070, 0x00000000 }, + { 0x0074, 0x00000000 }, + { 0x0078, 0x00000000 }, + { 0x007C, 0x00000000 }, + { 0x0034, 0x00000001 }, + AST_DRAMSTRUCT_UDELAY(67u), + { 0x002C, 0x00000942 }, + { 0x0030, 0x00000040 }, + { 0x0028, 0x00000005 }, + { 0x0028, 0x00000007 }, + { 0x0028, 0x00000003 }, + { 0x0028, 0x00000001 }, + { 0x000C, 0x00005a08 }, + { 0x002C, 0x00000842 }, + { 0x0028, 0x00000001 }, + { 0x0030, 0x000003c0 }, + { 0x0028, 0x00000003 }, + { 0x0030, 0x00000040 }, + { 0x0028, 0x00000003 }, + { 0x000C, 0x00005a21 }, + { 0x0034, 0x00007c03 }, + { 0x0120, 0x00005061 }, + AST_DRAMSTRUCT_INVALID, +}; + +/* + * AST2100/2150 DLL CBR Setting + */ +#define CBR_SIZE_AST2150 ((16 << 10) - 1) +#define CBR_PASSNUM_AST2150 5 +#define CBR_THRESHOLD_AST2150 10 +#define CBR_THRESHOLD2_AST2150 10 +#define TIMEOUT_AST2150 5000000 + +#define CBR_PATNUM_AST2150 8 + +static const u32 pattern_AST2150[14] = { + 0xFF00FF00, + 0xCC33CC33, + 0xAA55AA55, + 0xFFFE0001, + 0x683501FE, + 0x0F1929B0, + 0x2D0B4346, + 0x60767F02, + 0x6FBE36A6, + 0x3A253035, + 0x3019686D, + 0x41C6167E, + 0x620152BF, + 0x20F050E0 +}; + +static u32 mmctestburst2_ast2150(struct ast_device *ast, u32 datagen) +{ + u32 data, timeout; + + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + ast_moutdwm(ast, 0x1e6e0070, 0x00000001 | (datagen << 3)); + timeout = 0; + do { + data = ast_mindwm(ast, 0x1e6e0070) & 0x40; + if (++timeout > TIMEOUT_AST2150) { + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + return 0xffffffff; + } + } while (!data); + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + ast_moutdwm(ast, 0x1e6e0070, 0x00000003 | (datagen << 3)); + timeout = 0; + do { + data = ast_mindwm(ast, 0x1e6e0070) & 0x40; + if (++timeout > TIMEOUT_AST2150) { + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + return 0xffffffff; + } + } while (!data); + data = (ast_mindwm(ast, 0x1e6e0070) & 0x80) >> 7; + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + return data; +} + +static int cbrtest_ast2150(struct ast_device *ast) +{ + int i; + + for (i = 0; i < 8; i++) + if (mmctestburst2_ast2150(ast, i)) + return 0; + return 1; +} + +static int cbrscan_ast2150(struct ast_device *ast, int busw) +{ + u32 patcnt, loop; + + for (patcnt = 0; patcnt < CBR_PATNUM_AST2150; patcnt++) { + ast_moutdwm(ast, 0x1e6e007c, pattern_AST2150[patcnt]); + for (loop = 0; loop < CBR_PASSNUM_AST2150; loop++) { + if (cbrtest_ast2150(ast)) + break; + } + if (loop == CBR_PASSNUM_AST2150) + return 0; + } + return 1; +} + +static void cbrdlli_ast2150(struct ast_device *ast, int busw) +{ + u32 dll_min[4], dll_max[4], dlli, data, passcnt; + +cbr_start: + dll_min[0] = 0xff; + dll_min[1] = 0xff; + dll_min[2] = 0xff; + dll_min[3] = 0xff; + dll_max[0] = 0x00; + dll_max[1] = 0x00; + dll_max[2] = 0x00; + dll_max[3] = 0x00; + passcnt = 0; + + for (dlli = 0; dlli < 100; dlli++) { + ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24)); + data = cbrscan_ast2150(ast, busw); + if (data != 0) { + if (data & 0x1) { + if (dll_min[0] > dlli) + dll_min[0] = dlli; + if (dll_max[0] < dlli) + dll_max[0] = dlli; + } + passcnt++; + } else if (passcnt >= CBR_THRESHOLD_AST2150) { + goto cbr_start; + } + } + if (dll_max[0] == 0 || (dll_max[0] - dll_min[0]) < CBR_THRESHOLD_AST2150) + goto cbr_start; + + dlli = dll_min[0] + (((dll_max[0] - dll_min[0]) * 7) >> 4); + ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24)); +} + +static void ast_post_chip_2100(struct ast_device *ast) +{ + u8 j; + u32 data, temp, i; + const struct ast_dramstruct *dram_reg_info; + enum ast_dram_layout dram_layout = ast_2100_get_dram_layout_p2a(ast); + + j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + + if ((j & 0x80) == 0) { /* VGA only */ + if (ast->chip == AST2100 || ast->chip == AST2200) + dram_reg_info = ast2100_dram_table_data; + else + dram_reg_info = ast1100_dram_table_data; + + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + ast_write32(ast, 0x12000, 0x1688A8A8); + do { + ; + } while (ast_read32(ast, 0x12000) != 0x01); + + ast_write32(ast, 0x10000, 0xfc600309); + do { + ; + } while (ast_read32(ast, 0x10000) != 0x01); + + while (!AST_DRAMSTRUCT_IS(dram_reg_info, INVALID)) { + if (AST_DRAMSTRUCT_IS(dram_reg_info, UDELAY)) { + for (i = 0; i < 15; i++) + udelay(dram_reg_info->data); + } else if (AST_DRAMSTRUCT_IS(dram_reg_info, DRAM_TYPE)) { + switch (dram_layout) { + case AST_DRAM_1Gx16: + data = 0x00000d89; + break; + case AST_DRAM_1Gx32: + data = 0x00000c8d; + break; + default: + data = dram_reg_info->data; + break; + } + + temp = ast_read32(ast, 0x12070); + temp &= 0xc; + temp <<= 2; + ast_write32(ast, 0x10000 + dram_reg_info->index, data | temp); + } else { + ast_write32(ast, 0x10000 + dram_reg_info->index, + dram_reg_info->data); + } + dram_reg_info++; + } + + /* AST 2100/2150 DRAM calibration */ + data = ast_read32(ast, 0x10120); + if (data == 0x5061) { /* 266Mhz */ + data = ast_read32(ast, 0x10004); + if (data & 0x40) + cbrdlli_ast2150(ast, 16); /* 16 bits */ + else + cbrdlli_ast2150(ast, 32); /* 32 bits */ + } + + temp = ast_read32(ast, 0x1200c); + ast_write32(ast, 0x1200c, temp & 0xfffffffd); + temp = ast_read32(ast, 0x12040); + ast_write32(ast, 0x12040, temp | 0x40); + } + + /* wait ready */ + do { + j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + } while ((j & 0x40) == 0); +} + +int ast_2100_post(struct ast_device *ast) +{ + ast_2000_set_def_ext_reg(ast); + + if (ast->config_mode == ast_use_p2a) { + ast_post_chip_2100(ast); + } else { + if (ast->tx_chip == AST_TX_SIL164) { + /* Enable DVO */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/ast/ast_2300.c b/drivers/gpu/drm/ast/ast_2300.c new file mode 100644 index 000000000000..dc2a32244689 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_2300.c @@ -0,0 +1,1328 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: Dave Airlie <airlied@redhat.com> + */ + +#include <linux/delay.h> + +#include "ast_drv.h" +#include "ast_post.h" + +/* + * POST + */ + +void ast_2300_set_def_ext_reg(struct ast_device *ast) +{ + static const u8 extreginfo[] = { 0x0f, 0x04, 0x1f, 0xff }; + u8 i, index, reg; + const u8 *ext_reg_info; + + /* reset scratch */ + for (i = 0x81; i <= 0x9f; i++) + ast_set_index_reg(ast, AST_IO_VGACRI, i, 0x00); + + ext_reg_info = extreginfo; + index = 0xa0; + while (*ext_reg_info != 0xff) { + ast_set_index_reg_mask(ast, AST_IO_VGACRI, index, 0x00, *ext_reg_info); + index++; + ext_reg_info++; + } + + /* disable standard IO/MEM decode if secondary */ + /* ast_set_index_reg-mask(ast, AST_IO_VGACRI, 0xa1, 0xff, 0x3); */ + + /* Set Ext. Default */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x8c, 0x00, 0x01); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb7, 0x00, 0x00); + + /* Enable RAMDAC for A1 */ + reg = 0x04; + reg |= 0x20; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xff, reg); +} + +/* AST 2300 DRAM settings */ +#define AST_DDR3 0 +#define AST_DDR2 1 + +struct ast2300_dram_param { + u32 dram_type; + u32 dram_chipid; + u32 dram_freq; + u32 vram_size; + u32 odt; + u32 wodt; + u32 rodt; + u32 dram_config; + u32 reg_PERIOD; + u32 reg_MADJ; + u32 reg_SADJ; + u32 reg_MRS; + u32 reg_EMRS; + u32 reg_AC1; + u32 reg_AC2; + u32 reg_DQSIC; + u32 reg_DRV; + u32 reg_IOZ; + u32 reg_DQIDLY; + u32 reg_FREQ; + u32 madj_max; + u32 dll2_finetune_step; +}; + +/* + * DQSI DLL CBR Setting + */ +#define CBR_SIZE0 ((1 << 10) - 1) +#define CBR_SIZE1 ((4 << 10) - 1) +#define CBR_SIZE2 ((64 << 10) - 1) +#define CBR_PASSNUM 5 +#define CBR_PASSNUM2 5 +#define CBR_THRESHOLD 10 +#define CBR_THRESHOLD2 10 +#define TIMEOUT 5000000 +#define CBR_PATNUM 8 + +static const u32 pattern[8] = { + 0xFF00FF00, + 0xCC33CC33, + 0xAA55AA55, + 0x88778877, + 0x92CC4D6E, + 0x543D3CDE, + 0xF1E843C7, + 0x7C61D253 +}; + +static u32 mmc_test2(struct ast_device *ast, u32 datagen, u8 test_ctl) +{ + u32 data, timeout; + + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl); + timeout = 0; + do { + data = ast_mindwm(ast, 0x1e6e0070) & 0x1000; + if (++timeout > TIMEOUT) { + ast_moutdwm(ast, 0x1e6e0070, 0x0); + return 0xffffffff; + } + } while (!data); + data = ast_mindwm(ast, 0x1e6e0078); + data = (data | (data >> 16)) & 0xffff; + ast_moutdwm(ast, 0x1e6e0070, 0x00000000); + return data; +} + +static u32 mmc_test_burst2(struct ast_device *ast, u32 datagen) +{ + return mmc_test2(ast, datagen, 0x41); +} + +static bool mmc_test_single(struct ast_device *ast, u32 datagen) +{ + return mmc_test(ast, datagen, 0xc5); +} + +static u32 mmc_test_single2(struct ast_device *ast, u32 datagen) +{ + return mmc_test2(ast, datagen, 0x05); +} + +static int cbr_test(struct ast_device *ast) +{ + u32 data; + int i; + + data = mmc_test_single2(ast, 0); + if ((data & 0xff) && (data & 0xff00)) + return 0; + for (i = 0; i < 8; i++) { + data = mmc_test_burst2(ast, i); + if ((data & 0xff) && (data & 0xff00)) + return 0; + } + if (!data) + return 3; + else if (data & 0xff) + return 2; + return 1; +} + +static int cbr_scan(struct ast_device *ast) +{ + u32 data, data2, patcnt, loop; + + data2 = 3; + for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { + ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); + for (loop = 0; loop < CBR_PASSNUM2; loop++) { + data = cbr_test(ast); + if (data != 0) { + data2 &= data; + if (!data2) + return 0; + break; + } + } + if (loop == CBR_PASSNUM2) + return 0; + } + return data2; +} + +static u32 cbr_test2(struct ast_device *ast) +{ + u32 data; + + data = mmc_test_burst2(ast, 0); + if (data == 0xffff) + return 0; + data |= mmc_test_single2(ast, 0); + if (data == 0xffff) + return 0; + + return ~data & 0xffff; +} + +static u32 cbr_scan2(struct ast_device *ast) +{ + u32 data, data2, patcnt, loop; + + data2 = 0xffff; + for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { + ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); + for (loop = 0; loop < CBR_PASSNUM2; loop++) { + data = cbr_test2(ast); + if (data != 0) { + data2 &= data; + if (!data2) + return 0; + break; + } + } + if (loop == CBR_PASSNUM2) + return 0; + } + return data2; +} + +static bool cbr_test3(struct ast_device *ast) +{ + if (!mmc_test_burst(ast, 0)) + return false; + if (!mmc_test_single(ast, 0)) + return false; + return true; +} + +static bool cbr_scan3(struct ast_device *ast) +{ + u32 patcnt, loop; + + for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { + ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); + for (loop = 0; loop < 2; loop++) { + if (cbr_test3(ast)) + break; + } + if (loop == 2) + return false; + } + return true; +} + +static bool finetuneDQI_L(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt, retry = 0; + bool status = false; +FINETUNE_START: + for (cnt = 0; cnt < 16; cnt++) { + dllmin[cnt] = 0xff; + dllmax[cnt] = 0x0; + } + passcnt = 0; + for (dlli = 0; dlli < 76; dlli++) { + ast_moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24)); + ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE1); + data = cbr_scan2(ast); + if (data != 0) { + mask = 0x00010001; + for (cnt = 0; cnt < 16; cnt++) { + if (data & mask) { + if (dllmin[cnt] > dlli) + dllmin[cnt] = dlli; + if (dllmax[cnt] < dlli) + dllmax[cnt] = dlli; + } + mask <<= 1; + } + passcnt++; + } else if (passcnt >= CBR_THRESHOLD2) { + break; + } + } + gold_sadj[0] = 0x0; + passcnt = 0; + for (cnt = 0; cnt < 16; cnt++) { + if ((dllmax[cnt] > dllmin[cnt]) && + ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { + gold_sadj[0] += dllmin[cnt]; + passcnt++; + } + } + if (retry++ > 10) + goto FINETUNE_DONE; + if (passcnt != 16) + goto FINETUNE_START; + status = true; +FINETUNE_DONE: + gold_sadj[0] = gold_sadj[0] >> 4; + gold_sadj[1] = gold_sadj[0]; + + data = 0; + for (cnt = 0; cnt < 8; cnt++) { + data >>= 3; + if ((dllmax[cnt] > dllmin[cnt]) && + ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { + dlli = dllmin[cnt]; + if (gold_sadj[0] >= dlli) { + dlli = ((gold_sadj[0] - dlli) * 19) >> 5; + if (dlli > 3) + dlli = 3; + } else { + dlli = ((dlli - gold_sadj[0]) * 19) >> 5; + if (dlli > 4) + dlli = 4; + dlli = (8 - dlli) & 0x7; + } + data |= dlli << 21; + } + } + ast_moutdwm(ast, 0x1E6E0080, data); + + data = 0; + for (cnt = 8; cnt < 16; cnt++) { + data >>= 3; + if ((dllmax[cnt] > dllmin[cnt]) && + ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { + dlli = dllmin[cnt]; + if (gold_sadj[1] >= dlli) { + dlli = ((gold_sadj[1] - dlli) * 19) >> 5; + if (dlli > 3) + dlli = 3; + else + dlli = (dlli - 1) & 0x7; + } else { + dlli = ((dlli - gold_sadj[1]) * 19) >> 5; + dlli += 1; + if (dlli > 4) + dlli = 4; + dlli = (8 - dlli) & 0x7; + } + data |= dlli << 21; + } + } + ast_moutdwm(ast, 0x1E6E0084, data); + return status; +} /* finetuneDQI_L */ + +static void finetuneDQSI(struct ast_device *ast) +{ + u32 dlli, dqsip, dqidly; + u32 reg_mcr18, reg_mcr0c, passcnt[2], diff; + u32 g_dqidly, g_dqsip, g_margin, g_side; + u16 pass[32][2][2]; + char tag[2][76]; + + /* Disable DQI CBR */ + reg_mcr0c = ast_mindwm(ast, 0x1E6E000C); + reg_mcr18 = ast_mindwm(ast, 0x1E6E0018); + reg_mcr18 &= 0x0000ffff; + ast_moutdwm(ast, 0x1E6E0018, reg_mcr18); + + for (dlli = 0; dlli < 76; dlli++) { + tag[0][dlli] = 0x0; + tag[1][dlli] = 0x0; + } + for (dqidly = 0; dqidly < 32; dqidly++) { + pass[dqidly][0][0] = 0xff; + pass[dqidly][0][1] = 0x0; + pass[dqidly][1][0] = 0xff; + pass[dqidly][1][1] = 0x0; + } + for (dqidly = 0; dqidly < 32; dqidly++) { + passcnt[0] = 0; + passcnt[1] = 0; + for (dqsip = 0; dqsip < 2; dqsip++) { + ast_moutdwm(ast, 0x1E6E000C, 0); + ast_moutdwm(ast, 0x1E6E0018, reg_mcr18 | (dqidly << 16) | (dqsip << 23)); + ast_moutdwm(ast, 0x1E6E000C, reg_mcr0c); + for (dlli = 0; dlli < 76; dlli++) { + ast_moutdwm(ast, 0x1E6E0068, + 0x00001300 | (dlli << 16) | (dlli << 24)); + ast_moutdwm(ast, 0x1E6E0070, 0); + ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE0); + if (cbr_scan3(ast)) { + if (dlli == 0) + break; + passcnt[dqsip]++; + tag[dqsip][dlli] = 'P'; + if (dlli < pass[dqidly][dqsip][0]) + pass[dqidly][dqsip][0] = (u16)dlli; + if (dlli > pass[dqidly][dqsip][1]) + pass[dqidly][dqsip][1] = (u16)dlli; + } else if (passcnt[dqsip] >= 5) { + break; + } else { + pass[dqidly][dqsip][0] = 0xff; + pass[dqidly][dqsip][1] = 0x0; + } + } + } + if (passcnt[0] == 0 && passcnt[1] == 0) + dqidly++; + } + /* Search margin */ + g_dqidly = 0; + g_dqsip = 0; + g_margin = 0; + g_side = 0; + + for (dqidly = 0; dqidly < 32; dqidly++) { + for (dqsip = 0; dqsip < 2; dqsip++) { + if (pass[dqidly][dqsip][0] > pass[dqidly][dqsip][1]) + continue; + diff = pass[dqidly][dqsip][1] - pass[dqidly][dqsip][0]; + if ((diff + 2) < g_margin) + continue; + passcnt[0] = 0; + passcnt[1] = 0; + for (dlli = pass[dqidly][dqsip][0]; + dlli > 0 && tag[dqsip][dlli] != 0; + dlli--, passcnt[0]++) { + } + for (dlli = pass[dqidly][dqsip][1]; + dlli < 76 && tag[dqsip][dlli] != 0; + dlli++, passcnt[1]++) { + } + if (passcnt[0] > passcnt[1]) + passcnt[0] = passcnt[1]; + passcnt[1] = 0; + if (passcnt[0] > g_side) + passcnt[1] = passcnt[0] - g_side; + if (diff > (g_margin + 1) && (passcnt[1] > 0 || passcnt[0] > 8)) { + g_margin = diff; + g_dqidly = dqidly; + g_dqsip = dqsip; + g_side = passcnt[0]; + } else if (passcnt[1] > 1 && g_side < 8) { + if (diff > g_margin) + g_margin = diff; + g_dqidly = dqidly; + g_dqsip = dqsip; + g_side = passcnt[0]; + } + } + } + reg_mcr18 = reg_mcr18 | (g_dqidly << 16) | (g_dqsip << 23); + ast_moutdwm(ast, 0x1E6E0018, reg_mcr18); +} + +static bool cbr_dll2(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 dllmin[2], dllmax[2], dlli, data, passcnt, retry = 0; + bool status = false; + + finetuneDQSI(ast); + if (finetuneDQI_L(ast, param) == false) + return status; + +CBR_START2: + dllmin[0] = 0xff; + dllmin[1] = 0xff; + dllmax[0] = 0x0; + dllmax[1] = 0x0; + passcnt = 0; + for (dlli = 0; dlli < 76; dlli++) { + ast_moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24)); + ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE2); + data = cbr_scan(ast); + if (data != 0) { + if (data & 0x1) { + if (dllmin[0] > dlli) + dllmin[0] = dlli; + if (dllmax[0] < dlli) + dllmax[0] = dlli; + } + if (data & 0x2) { + if (dllmin[1] > dlli) + dllmin[1] = dlli; + if (dllmax[1] < dlli) + dllmax[1] = dlli; + } + passcnt++; + } else if (passcnt >= CBR_THRESHOLD) { + break; + } + } + if (retry++ > 10) + goto CBR_DONE2; + if (dllmax[0] == 0 || (dllmax[0] - dllmin[0]) < CBR_THRESHOLD) + goto CBR_START2; + if (dllmax[1] == 0 || (dllmax[1] - dllmin[1]) < CBR_THRESHOLD) + goto CBR_START2; + status = true; +CBR_DONE2: + dlli = (dllmin[1] + dllmax[1]) >> 1; + dlli <<= 8; + dlli += (dllmin[0] + dllmax[0]) >> 1; + ast_moutdwm(ast, 0x1E6E0068, ast_mindwm(ast, 0x1E720058) | (dlli << 16)); + return status; +} /* CBRDLL2 */ + +static void get_ddr3_info(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 trap, trap_AC2, trap_MRS; + + ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); + + /* Ger trap info */ + trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3; + trap_AC2 = 0x00020000 + (trap << 16); + trap_AC2 |= 0x00300000 + ((trap & 0x2) << 19); + trap_MRS = 0x00000010 + (trap << 4); + trap_MRS |= ((trap & 0x2) << 18); + + param->reg_MADJ = 0x00034C4C; + param->reg_SADJ = 0x00001800; + param->reg_DRV = 0x000000F0; + param->reg_PERIOD = param->dram_freq; + param->rodt = 0; + + switch (param->dram_freq) { + case 336: + ast_moutdwm(ast, 0x1E6E2020, 0x0190); + param->wodt = 0; + param->reg_AC1 = 0x22202725; + param->reg_AC2 = 0xAA007613 | trap_AC2; + param->reg_DQSIC = 0x000000BA; + param->reg_MRS = 0x04001400 | trap_MRS; + param->reg_EMRS = 0x00000000; + param->reg_IOZ = 0x00000023; + param->reg_DQIDLY = 0x00000074; + param->reg_FREQ = 0x00004DC0; + param->madj_max = 96; + param->dll2_finetune_step = 3; + switch (param->dram_chipid) { + default: + case AST_DRAM_512Mx16: + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xAA007613 | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xAA00761C | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xAA007636 | trap_AC2; + break; + } + break; + default: + case 396: + ast_moutdwm(ast, 0x1E6E2020, 0x03F1); + param->wodt = 1; + param->reg_AC1 = 0x33302825; + param->reg_AC2 = 0xCC009617 | trap_AC2; + param->reg_DQSIC = 0x000000E2; + param->reg_MRS = 0x04001600 | trap_MRS; + param->reg_EMRS = 0x00000000; + param->reg_IOZ = 0x00000034; + param->reg_DRV = 0x000000FA; + param->reg_DQIDLY = 0x00000089; + param->reg_FREQ = 0x00005040; + param->madj_max = 96; + param->dll2_finetune_step = 4; + + switch (param->dram_chipid) { + default: + case AST_DRAM_512Mx16: + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xCC009617 | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xCC009622 | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xCC00963F | trap_AC2; + break; + } + break; + + case 408: + ast_moutdwm(ast, 0x1E6E2020, 0x01F0); + param->wodt = 1; + param->reg_AC1 = 0x33302825; + param->reg_AC2 = 0xCC009617 | trap_AC2; + param->reg_DQSIC = 0x000000E2; + param->reg_MRS = 0x04001600 | trap_MRS; + param->reg_EMRS = 0x00000000; + param->reg_IOZ = 0x00000023; + param->reg_DRV = 0x000000FA; + param->reg_DQIDLY = 0x00000089; + param->reg_FREQ = 0x000050C0; + param->madj_max = 96; + param->dll2_finetune_step = 4; + + switch (param->dram_chipid) { + default: + case AST_DRAM_512Mx16: + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xCC009617 | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xCC009622 | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xCC00963F | trap_AC2; + break; + } + + break; + case 456: + ast_moutdwm(ast, 0x1E6E2020, 0x0230); + param->wodt = 0; + param->reg_AC1 = 0x33302926; + param->reg_AC2 = 0xCD44961A; + param->reg_DQSIC = 0x000000FC; + param->reg_MRS = 0x00081830; + param->reg_EMRS = 0x00000000; + param->reg_IOZ = 0x00000045; + param->reg_DQIDLY = 0x00000097; + param->reg_FREQ = 0x000052C0; + param->madj_max = 88; + param->dll2_finetune_step = 4; + break; + case 504: + ast_moutdwm(ast, 0x1E6E2020, 0x0270); + param->wodt = 1; + param->reg_AC1 = 0x33302926; + param->reg_AC2 = 0xDE44A61D; + param->reg_DQSIC = 0x00000117; + param->reg_MRS = 0x00081A30; + param->reg_EMRS = 0x00000000; + param->reg_IOZ = 0x070000BB; + param->reg_DQIDLY = 0x000000A0; + param->reg_FREQ = 0x000054C0; + param->madj_max = 79; + param->dll2_finetune_step = 4; + break; + case 528: + ast_moutdwm(ast, 0x1E6E2020, 0x0290); + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x33302926; + param->reg_AC2 = 0xEF44B61E; + param->reg_DQSIC = 0x00000125; + param->reg_MRS = 0x00081A30; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x000000F5; + param->reg_IOZ = 0x00000023; + param->reg_DQIDLY = 0x00000088; + param->reg_FREQ = 0x000055C0; + param->madj_max = 76; + param->dll2_finetune_step = 3; + break; + case 576: + ast_moutdwm(ast, 0x1E6E2020, 0x0140); + param->reg_MADJ = 0x00136868; + param->reg_SADJ = 0x00004534; + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x33302A37; + param->reg_AC2 = 0xEF56B61E; + param->reg_DQSIC = 0x0000013F; + param->reg_MRS = 0x00101A50; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x000000FA; + param->reg_IOZ = 0x00000023; + param->reg_DQIDLY = 0x00000078; + param->reg_FREQ = 0x000057C0; + param->madj_max = 136; + param->dll2_finetune_step = 3; + break; + case 600: + ast_moutdwm(ast, 0x1E6E2020, 0x02E1); + param->reg_MADJ = 0x00136868; + param->reg_SADJ = 0x00004534; + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x32302A37; + param->reg_AC2 = 0xDF56B61F; + param->reg_DQSIC = 0x0000014D; + param->reg_MRS = 0x00101A50; + param->reg_EMRS = 0x00000004; + param->reg_DRV = 0x000000F5; + param->reg_IOZ = 0x00000023; + param->reg_DQIDLY = 0x00000078; + param->reg_FREQ = 0x000058C0; + param->madj_max = 132; + param->dll2_finetune_step = 3; + break; + case 624: + ast_moutdwm(ast, 0x1E6E2020, 0x0160); + param->reg_MADJ = 0x00136868; + param->reg_SADJ = 0x00004534; + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x32302A37; + param->reg_AC2 = 0xEF56B621; + param->reg_DQSIC = 0x0000015A; + param->reg_MRS = 0x02101A50; + param->reg_EMRS = 0x00000004; + param->reg_DRV = 0x000000F5; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x00000078; + param->reg_FREQ = 0x000059C0; + param->madj_max = 128; + param->dll2_finetune_step = 3; + break; + } /* switch freq */ + + switch (param->dram_chipid) { + case AST_DRAM_512Mx16: + param->dram_config = 0x130; + break; + default: + case AST_DRAM_1Gx16: + param->dram_config = 0x131; + break; + case AST_DRAM_2Gx16: + param->dram_config = 0x132; + break; + case AST_DRAM_4Gx16: + param->dram_config = 0x133; + break; + } /* switch size */ + + switch (param->vram_size) { + default: + case SZ_8M: + param->dram_config |= 0x00; + break; + case SZ_16M: + param->dram_config |= 0x04; + break; + case SZ_32M: + param->dram_config |= 0x08; + break; + case SZ_64M: + param->dram_config |= 0x0c; + break; + } +} + +static void ddr3_init(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 data, data2, retry = 0; + +ddr3_init_start: + ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); + ast_moutdwm(ast, 0x1E6E0018, 0x00000100); + ast_moutdwm(ast, 0x1E6E0024, 0x00000000); + ast_moutdwm(ast, 0x1E6E0034, 0x00000000); + udelay(10); + ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ); + ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ); + udelay(10); + ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000); + udelay(10); + + ast_moutdwm(ast, 0x1E6E0004, param->dram_config); + ast_moutdwm(ast, 0x1E6E0008, 0x90040f); + ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1); + ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2); + ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC); + ast_moutdwm(ast, 0x1E6E0080, 0x00000000); + ast_moutdwm(ast, 0x1E6E0084, 0x00000000); + ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY); + ast_moutdwm(ast, 0x1E6E0018, 0x4000A170); + ast_moutdwm(ast, 0x1E6E0018, 0x00002370); + ast_moutdwm(ast, 0x1E6E0038, 0x00000000); + ast_moutdwm(ast, 0x1E6E0040, 0xFF444444); + ast_moutdwm(ast, 0x1E6E0044, 0x22222222); + ast_moutdwm(ast, 0x1E6E0048, 0x22222222); + ast_moutdwm(ast, 0x1E6E004C, 0x00000002); + ast_moutdwm(ast, 0x1E6E0050, 0x80000000); + ast_moutdwm(ast, 0x1E6E0050, 0x00000000); + ast_moutdwm(ast, 0x1E6E0054, 0); + ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV); + ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ); + ast_moutdwm(ast, 0x1E6E0070, 0x00000000); + ast_moutdwm(ast, 0x1E6E0074, 0x00000000); + ast_moutdwm(ast, 0x1E6E0078, 0x00000000); + ast_moutdwm(ast, 0x1E6E007C, 0x00000000); + /* Wait MCLK2X lock to MCLK */ + do { + data = ast_mindwm(ast, 0x1E6E001C); + } while (!(data & 0x08000000)); + data = ast_mindwm(ast, 0x1E6E001C); + data = (data >> 8) & 0xff; + while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) { + data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4; + if ((data2 & 0xff) > param->madj_max) + break; + ast_moutdwm(ast, 0x1E6E0064, data2); + if (data2 & 0x00100000) + data2 = ((data2 & 0xff) >> 3) + 3; + else + data2 = ((data2 & 0xff) >> 2) + 5; + data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff; + data2 += data & 0xff; + data = data | (data2 << 8); + ast_moutdwm(ast, 0x1E6E0068, data); + udelay(10); + ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000); + udelay(10); + data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff; + ast_moutdwm(ast, 0x1E6E0018, data); + data = data | 0x200; + ast_moutdwm(ast, 0x1E6E0018, data); + do { + data = ast_mindwm(ast, 0x1E6E001C); + } while (!(data & 0x08000000)); + + data = ast_mindwm(ast, 0x1E6E001C); + data = (data >> 8) & 0xff; + } + ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0068) & 0xffff); + data = ast_mindwm(ast, 0x1E6E0018) | 0xC00; + ast_moutdwm(ast, 0x1E6E0018, data); + + ast_moutdwm(ast, 0x1E6E0034, 0x00000001); + ast_moutdwm(ast, 0x1E6E000C, 0x00000040); + udelay(50); + /* Mode Register Setting */ + ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100); + ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); + ast_moutdwm(ast, 0x1E6E0028, 0x00000005); + ast_moutdwm(ast, 0x1E6E0028, 0x00000007); + ast_moutdwm(ast, 0x1E6E0028, 0x00000003); + ast_moutdwm(ast, 0x1E6E0028, 0x00000001); + ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS); + ast_moutdwm(ast, 0x1E6E000C, 0x00005C08); + ast_moutdwm(ast, 0x1E6E0028, 0x00000001); + + ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); + data = 0; + if (param->wodt) + data = 0x300; + if (param->rodt) + data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3); + ast_moutdwm(ast, 0x1E6E0034, data | 0x3); + + /* Calibrate the DQSI delay */ + if ((cbr_dll2(ast, param) == false) && (retry++ < 10)) + goto ddr3_init_start; + + ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ); + /* ECC Memory Initialization */ +#ifdef ECC + ast_moutdwm(ast, 0x1E6E007C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0070, 0x221); + do { + data = ast_mindwm(ast, 0x1E6E0070); + } while (!(data & 0x00001000)); + ast_moutdwm(ast, 0x1E6E0070, 0x00000000); + ast_moutdwm(ast, 0x1E6E0050, 0x80000000); + ast_moutdwm(ast, 0x1E6E0050, 0x00000000); +#endif +} + +static void get_ddr2_info(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 trap, trap_AC2, trap_MRS; + + ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); + + /* Ger trap info */ + trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3; + trap_AC2 = (trap << 20) | (trap << 16); + trap_AC2 += 0x00110000; + trap_MRS = 0x00000040 | (trap << 4); + + param->reg_MADJ = 0x00034C4C; + param->reg_SADJ = 0x00001800; + param->reg_DRV = 0x000000F0; + param->reg_PERIOD = param->dram_freq; + param->rodt = 0; + + switch (param->dram_freq) { + case 264: + ast_moutdwm(ast, 0x1E6E2020, 0x0130); + param->wodt = 0; + param->reg_AC1 = 0x11101513; + param->reg_AC2 = 0x78117011; + param->reg_DQSIC = 0x00000092; + param->reg_MRS = 0x00000842; + param->reg_EMRS = 0x00000000; + param->reg_DRV = 0x000000F0; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x0000005A; + param->reg_FREQ = 0x00004AC0; + param->madj_max = 138; + param->dll2_finetune_step = 3; + break; + case 336: + ast_moutdwm(ast, 0x1E6E2020, 0x0190); + param->wodt = 1; + param->reg_AC1 = 0x22202613; + param->reg_AC2 = 0xAA009016 | trap_AC2; + param->reg_DQSIC = 0x000000BA; + param->reg_MRS = 0x00000A02 | trap_MRS; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x000000FA; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x00000074; + param->reg_FREQ = 0x00004DC0; + param->madj_max = 96; + param->dll2_finetune_step = 3; + switch (param->dram_chipid) { + default: + case AST_DRAM_512Mx16: + param->reg_AC2 = 0xAA009012 | trap_AC2; + break; + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xAA009016 | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xAA009023 | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xAA00903B | trap_AC2; + break; + } + break; + default: + case 396: + ast_moutdwm(ast, 0x1E6E2020, 0x03F1); + param->wodt = 1; + param->rodt = 0; + param->reg_AC1 = 0x33302714; + param->reg_AC2 = 0xCC00B01B | trap_AC2; + param->reg_DQSIC = 0x000000E2; + param->reg_MRS = 0x00000C02 | trap_MRS; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x000000FA; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x00000089; + param->reg_FREQ = 0x00005040; + param->madj_max = 96; + param->dll2_finetune_step = 4; + + switch (param->dram_chipid) { + case AST_DRAM_512Mx16: + param->reg_AC2 = 0xCC00B016 | trap_AC2; + break; + default: + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xCC00B01B | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xCC00B02B | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xCC00B03F | trap_AC2; + break; + } + + break; + + case 408: + ast_moutdwm(ast, 0x1E6E2020, 0x01F0); + param->wodt = 1; + param->rodt = 0; + param->reg_AC1 = 0x33302714; + param->reg_AC2 = 0xCC00B01B | trap_AC2; + param->reg_DQSIC = 0x000000E2; + param->reg_MRS = 0x00000C02 | trap_MRS; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x000000FA; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x00000089; + param->reg_FREQ = 0x000050C0; + param->madj_max = 96; + param->dll2_finetune_step = 4; + + switch (param->dram_chipid) { + case AST_DRAM_512Mx16: + param->reg_AC2 = 0xCC00B016 | trap_AC2; + break; + default: + case AST_DRAM_1Gx16: + param->reg_AC2 = 0xCC00B01B | trap_AC2; + break; + case AST_DRAM_2Gx16: + param->reg_AC2 = 0xCC00B02B | trap_AC2; + break; + case AST_DRAM_4Gx16: + param->reg_AC2 = 0xCC00B03F | trap_AC2; + break; + } + + break; + case 456: + ast_moutdwm(ast, 0x1E6E2020, 0x0230); + param->wodt = 0; + param->reg_AC1 = 0x33302815; + param->reg_AC2 = 0xCD44B01E; + param->reg_DQSIC = 0x000000FC; + param->reg_MRS = 0x00000E72; + param->reg_EMRS = 0x00000000; + param->reg_DRV = 0x00000000; + param->reg_IOZ = 0x00000034; + param->reg_DQIDLY = 0x00000097; + param->reg_FREQ = 0x000052C0; + param->madj_max = 88; + param->dll2_finetune_step = 3; + break; + case 504: + ast_moutdwm(ast, 0x1E6E2020, 0x0261); + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x33302815; + param->reg_AC2 = 0xDE44C022; + param->reg_DQSIC = 0x00000117; + param->reg_MRS = 0x00000E72; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x0000000A; + param->reg_IOZ = 0x00000045; + param->reg_DQIDLY = 0x000000A0; + param->reg_FREQ = 0x000054C0; + param->madj_max = 79; + param->dll2_finetune_step = 3; + break; + case 528: + ast_moutdwm(ast, 0x1E6E2020, 0x0120); + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x33302815; + param->reg_AC2 = 0xEF44D024; + param->reg_DQSIC = 0x00000125; + param->reg_MRS = 0x00000E72; + param->reg_EMRS = 0x00000004; + param->reg_DRV = 0x000000F9; + param->reg_IOZ = 0x00000045; + param->reg_DQIDLY = 0x000000A7; + param->reg_FREQ = 0x000055C0; + param->madj_max = 76; + param->dll2_finetune_step = 3; + break; + case 552: + ast_moutdwm(ast, 0x1E6E2020, 0x02A1); + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x43402915; + param->reg_AC2 = 0xFF44E025; + param->reg_DQSIC = 0x00000132; + param->reg_MRS = 0x00000E72; + param->reg_EMRS = 0x00000040; + param->reg_DRV = 0x0000000A; + param->reg_IOZ = 0x00000045; + param->reg_DQIDLY = 0x000000AD; + param->reg_FREQ = 0x000056C0; + param->madj_max = 76; + param->dll2_finetune_step = 3; + break; + case 576: + ast_moutdwm(ast, 0x1E6E2020, 0x0140); + param->wodt = 1; + param->rodt = 1; + param->reg_AC1 = 0x43402915; + param->reg_AC2 = 0xFF44E027; + param->reg_DQSIC = 0x0000013F; + param->reg_MRS = 0x00000E72; + param->reg_EMRS = 0x00000004; + param->reg_DRV = 0x000000F5; + param->reg_IOZ = 0x00000045; + param->reg_DQIDLY = 0x000000B3; + param->reg_FREQ = 0x000057C0; + param->madj_max = 76; + param->dll2_finetune_step = 3; + break; + } + + switch (param->dram_chipid) { + case AST_DRAM_512Mx16: + param->dram_config = 0x100; + break; + default: + case AST_DRAM_1Gx16: + param->dram_config = 0x121; + break; + case AST_DRAM_2Gx16: + param->dram_config = 0x122; + break; + case AST_DRAM_4Gx16: + param->dram_config = 0x123; + break; + } /* switch size */ + + switch (param->vram_size) { + default: + case SZ_8M: + param->dram_config |= 0x00; + break; + case SZ_16M: + param->dram_config |= 0x04; + break; + case SZ_32M: + param->dram_config |= 0x08; + break; + case SZ_64M: + param->dram_config |= 0x0c; + break; + } +} + +static void ddr2_init(struct ast_device *ast, struct ast2300_dram_param *param) +{ + u32 data, data2, retry = 0; + +ddr2_init_start: + ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); + ast_moutdwm(ast, 0x1E6E0018, 0x00000100); + ast_moutdwm(ast, 0x1E6E0024, 0x00000000); + ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ); + ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ); + udelay(10); + ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000); + udelay(10); + + ast_moutdwm(ast, 0x1E6E0004, param->dram_config); + ast_moutdwm(ast, 0x1E6E0008, 0x90040f); + ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1); + ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2); + ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC); + ast_moutdwm(ast, 0x1E6E0080, 0x00000000); + ast_moutdwm(ast, 0x1E6E0084, 0x00000000); + ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY); + ast_moutdwm(ast, 0x1E6E0018, 0x4000A130); + ast_moutdwm(ast, 0x1E6E0018, 0x00002330); + ast_moutdwm(ast, 0x1E6E0038, 0x00000000); + ast_moutdwm(ast, 0x1E6E0040, 0xFF808000); + ast_moutdwm(ast, 0x1E6E0044, 0x88848466); + ast_moutdwm(ast, 0x1E6E0048, 0x44440008); + ast_moutdwm(ast, 0x1E6E004C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0050, 0x80000000); + ast_moutdwm(ast, 0x1E6E0050, 0x00000000); + ast_moutdwm(ast, 0x1E6E0054, 0); + ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV); + ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ); + ast_moutdwm(ast, 0x1E6E0070, 0x00000000); + ast_moutdwm(ast, 0x1E6E0074, 0x00000000); + ast_moutdwm(ast, 0x1E6E0078, 0x00000000); + ast_moutdwm(ast, 0x1E6E007C, 0x00000000); + + /* Wait MCLK2X lock to MCLK */ + do { + data = ast_mindwm(ast, 0x1E6E001C); + } while (!(data & 0x08000000)); + data = ast_mindwm(ast, 0x1E6E001C); + data = (data >> 8) & 0xff; + while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) { + data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4; + if ((data2 & 0xff) > param->madj_max) + break; + ast_moutdwm(ast, 0x1E6E0064, data2); + if (data2 & 0x00100000) + data2 = ((data2 & 0xff) >> 3) + 3; + else + data2 = ((data2 & 0xff) >> 2) + 5; + data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff; + data2 += data & 0xff; + data = data | (data2 << 8); + ast_moutdwm(ast, 0x1E6E0068, data); + udelay(10); + ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000); + udelay(10); + data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff; + ast_moutdwm(ast, 0x1E6E0018, data); + data = data | 0x200; + ast_moutdwm(ast, 0x1E6E0018, data); + do { + data = ast_mindwm(ast, 0x1E6E001C); + } while (!(data & 0x08000000)); + + data = ast_mindwm(ast, 0x1E6E001C); + data = (data >> 8) & 0xff; + } + ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0008) & 0xffff); + data = ast_mindwm(ast, 0x1E6E0018) | 0xC00; + ast_moutdwm(ast, 0x1E6E0018, data); + + ast_moutdwm(ast, 0x1E6E0034, 0x00000001); + ast_moutdwm(ast, 0x1E6E000C, 0x00000000); + udelay(50); + /* Mode Register Setting */ + ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100); + ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); + ast_moutdwm(ast, 0x1E6E0028, 0x00000005); + ast_moutdwm(ast, 0x1E6E0028, 0x00000007); + ast_moutdwm(ast, 0x1E6E0028, 0x00000003); + ast_moutdwm(ast, 0x1E6E0028, 0x00000001); + + ast_moutdwm(ast, 0x1E6E000C, 0x00005C08); + ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS); + ast_moutdwm(ast, 0x1E6E0028, 0x00000001); + ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS | 0x380); + ast_moutdwm(ast, 0x1E6E0028, 0x00000003); + ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); + ast_moutdwm(ast, 0x1E6E0028, 0x00000003); + + ast_moutdwm(ast, 0x1E6E000C, 0x7FFF5C01); + data = 0; + if (param->wodt) + data = 0x500; + if (param->rodt) + data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3); + ast_moutdwm(ast, 0x1E6E0034, data | 0x3); + ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ); + + /* Calibrate the DQSI delay */ + if ((cbr_dll2(ast, param) == false) && (retry++ < 10)) + goto ddr2_init_start; + + /* ECC Memory Initialization */ +#ifdef ECC + ast_moutdwm(ast, 0x1E6E007C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0070, 0x221); + do { + data = ast_mindwm(ast, 0x1E6E0070); + } while (!(data & 0x00001000)); + ast_moutdwm(ast, 0x1E6E0070, 0x00000000); + ast_moutdwm(ast, 0x1E6E0050, 0x80000000); + ast_moutdwm(ast, 0x1E6E0050, 0x00000000); +#endif +} + +static void ast_post_chip_2300(struct ast_device *ast) +{ + struct ast2300_dram_param param; + u32 temp; + u8 reg; + + reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + if ((reg & 0x80) == 0) {/* vga only */ + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + ast_write32(ast, 0x12000, 0x1688a8a8); + do { + ; + } while (ast_read32(ast, 0x12000) != 0x1); + + ast_write32(ast, 0x10000, 0xfc600309); + do { + ; + } while (ast_read32(ast, 0x10000) != 0x1); + + /* Slow down CPU/AHB CLK in VGA only mode */ + temp = ast_read32(ast, 0x12008); + temp |= 0x73; + ast_write32(ast, 0x12008, temp); + + param.dram_freq = 396; + param.dram_type = AST_DDR3; + temp = ast_mindwm(ast, 0x1e6e2070); + if (temp & 0x01000000) + param.dram_type = AST_DDR2; + switch (temp & 0x18000000) { + case 0: + param.dram_chipid = AST_DRAM_512Mx16; + break; + default: + case 0x08000000: + param.dram_chipid = AST_DRAM_1Gx16; + break; + case 0x10000000: + param.dram_chipid = AST_DRAM_2Gx16; + break; + case 0x18000000: + param.dram_chipid = AST_DRAM_4Gx16; + break; + } + switch (temp & 0x0c) { + default: + case 0x00: + param.vram_size = SZ_8M; + break; + case 0x04: + param.vram_size = SZ_16M; + break; + case 0x08: + param.vram_size = SZ_32M; + break; + case 0x0c: + param.vram_size = SZ_64M; + break; + } + + if (param.dram_type == AST_DDR3) { + get_ddr3_info(ast, ¶m); + ddr3_init(ast, ¶m); + } else { + get_ddr2_info(ast, ¶m); + ddr2_init(ast, ¶m); + } + + temp = ast_mindwm(ast, 0x1e6e2040); + ast_moutdwm(ast, 0x1e6e2040, temp | 0x40); + } + + /* wait ready */ + do { + reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + } while ((reg & 0x40) == 0); +} + +int ast_2300_post(struct ast_device *ast) +{ + ast_2300_set_def_ext_reg(ast); + + if (ast->config_mode == ast_use_p2a) { + ast_post_chip_2300(ast); + ast_init_3rdtx(ast); + } else { + if (ast->tx_chip == AST_TX_SIL164) { + /* Enable DVO */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/ast/ast_2500.c b/drivers/gpu/drm/ast/ast_2500.c new file mode 100644 index 000000000000..1e541498ea67 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_2500.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: Dave Airlie <airlied@redhat.com> + */ + +#include <linux/delay.h> + +#include <drm/drm_print.h> + +#include "ast_drv.h" +#include "ast_post.h" + +/* + * POST + */ + +/* + * AST2500 DRAM settings modules + */ + +#define REGTBL_NUM 17 +#define REGIDX_010 0 +#define REGIDX_014 1 +#define REGIDX_018 2 +#define REGIDX_020 3 +#define REGIDX_024 4 +#define REGIDX_02C 5 +#define REGIDX_030 6 +#define REGIDX_214 7 +#define REGIDX_2E0 8 +#define REGIDX_2E4 9 +#define REGIDX_2E8 10 +#define REGIDX_2EC 11 +#define REGIDX_2F0 12 +#define REGIDX_2F4 13 +#define REGIDX_2F8 14 +#define REGIDX_RFC 15 +#define REGIDX_PLL 16 + +static const u32 ast2500_ddr3_1600_timing_table[REGTBL_NUM] = { + 0x64604D38, /* 0x010 */ + 0x29690599, /* 0x014 */ + 0x00000300, /* 0x018 */ + 0x00000000, /* 0x020 */ + 0x00000000, /* 0x024 */ + 0x02181E70, /* 0x02C */ + 0x00000040, /* 0x030 */ + 0x00000024, /* 0x214 */ + 0x02001300, /* 0x2E0 */ + 0x0E0000A0, /* 0x2E4 */ + 0x000E001B, /* 0x2E8 */ + 0x35B8C105, /* 0x2EC */ + 0x08090408, /* 0x2F0 */ + 0x9B000800, /* 0x2F4 */ + 0x0E400A00, /* 0x2F8 */ + 0x9971452F, /* tRFC */ + 0x000071C1 /* PLL */ +}; + +static const u32 ast2500_ddr4_1600_timing_table[REGTBL_NUM] = { + 0x63604E37, /* 0x010 */ + 0xE97AFA99, /* 0x014 */ + 0x00019000, /* 0x018 */ + 0x08000000, /* 0x020 */ + 0x00000400, /* 0x024 */ + 0x00000410, /* 0x02C */ + 0x00000101, /* 0x030 */ + 0x00000024, /* 0x214 */ + 0x03002900, /* 0x2E0 */ + 0x0E0000A0, /* 0x2E4 */ + 0x000E001C, /* 0x2E8 */ + 0x35B8C106, /* 0x2EC */ + 0x08080607, /* 0x2F0 */ + 0x9B000900, /* 0x2F4 */ + 0x0E400A00, /* 0x2F8 */ + 0x99714545, /* tRFC */ + 0x000071C1 /* PLL */ +}; + +#define TIMEOUT 5000000 + +void ast_2500_patch_ahb(void __iomem *regs) +{ + u32 data; + + /* Clear bus lock condition */ + __ast_moutdwm(regs, 0x1e600000, 0xAEED1A03); + __ast_moutdwm(regs, 0x1e600084, 0x00010000); + __ast_moutdwm(regs, 0x1e600088, 0x00000000); + __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); + + data = __ast_mindwm(regs, 0x1e6e2070); + if (data & 0x08000000) { /* check fast reset */ + /* + * If "Fast restet" is enabled for ARM-ICE debugger, + * then WDT needs to enable, that + * WDT04 is WDT#1 Reload reg. + * WDT08 is WDT#1 counter restart reg to avoid system deadlock + * WDT0C is WDT#1 control reg + * [6:5]:= 01:Full chip + * [4]:= 1:1MHz clock source + * [1]:= 1:WDT will be cleeared and disabled after timeout occurs + * [0]:= 1:WDT enable + */ + __ast_moutdwm(regs, 0x1E785004, 0x00000010); + __ast_moutdwm(regs, 0x1E785008, 0x00004755); + __ast_moutdwm(regs, 0x1E78500c, 0x00000033); + udelay(1000); + } + + do { + __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); + data = __ast_mindwm(regs, 0x1e6e2000); + } while (data != 1); + + __ast_moutdwm(regs, 0x1e6e207c, 0x08000000); /* clear fast reset */ +} + +static bool mmc_test_single_2500(struct ast_device *ast, u32 datagen) +{ + return mmc_test(ast, datagen, 0x85); +} + +static bool cbr_test_2500(struct ast_device *ast) +{ + ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF); + ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00); + if (!mmc_test_burst(ast, 0)) + return false; + if (!mmc_test_single_2500(ast, 0)) + return false; + return true; +} + +static bool ddr_test_2500(struct ast_device *ast) +{ + ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF); + ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00); + if (!mmc_test_burst(ast, 0)) + return false; + if (!mmc_test_burst(ast, 1)) + return false; + if (!mmc_test_burst(ast, 2)) + return false; + if (!mmc_test_burst(ast, 3)) + return false; + if (!mmc_test_single_2500(ast, 0)) + return false; + return true; +} + +static void ddr_init_common_2500(struct ast_device *ast) +{ + ast_moutdwm(ast, 0x1E6E0034, 0x00020080); + ast_moutdwm(ast, 0x1E6E0008, 0x2003000F); + ast_moutdwm(ast, 0x1E6E0038, 0x00000FFF); + ast_moutdwm(ast, 0x1E6E0040, 0x88448844); + ast_moutdwm(ast, 0x1E6E0044, 0x24422288); + ast_moutdwm(ast, 0x1E6E0048, 0x22222222); + ast_moutdwm(ast, 0x1E6E004C, 0x22222222); + ast_moutdwm(ast, 0x1E6E0050, 0x80000000); + ast_moutdwm(ast, 0x1E6E0208, 0x00000000); + ast_moutdwm(ast, 0x1E6E0218, 0x00000000); + ast_moutdwm(ast, 0x1E6E0220, 0x00000000); + ast_moutdwm(ast, 0x1E6E0228, 0x00000000); + ast_moutdwm(ast, 0x1E6E0230, 0x00000000); + ast_moutdwm(ast, 0x1E6E02A8, 0x00000000); + ast_moutdwm(ast, 0x1E6E02B0, 0x00000000); + ast_moutdwm(ast, 0x1E6E0240, 0x86000000); + ast_moutdwm(ast, 0x1E6E0244, 0x00008600); + ast_moutdwm(ast, 0x1E6E0248, 0x80000000); + ast_moutdwm(ast, 0x1E6E024C, 0x80808080); +} + +static void ddr_phy_init_2500(struct ast_device *ast) +{ + u32 data, pass, timecnt; + + pass = 0; + ast_moutdwm(ast, 0x1E6E0060, 0x00000005); + while (!pass) { + for (timecnt = 0; timecnt < TIMEOUT; timecnt++) { + data = ast_mindwm(ast, 0x1E6E0060) & 0x1; + if (!data) + break; + } + if (timecnt != TIMEOUT) { + data = ast_mindwm(ast, 0x1E6E0300) & 0x000A0000; + if (!data) + pass = 1; + } + if (!pass) { + ast_moutdwm(ast, 0x1E6E0060, 0x00000000); + udelay(10); /* delay 10 us */ + ast_moutdwm(ast, 0x1E6E0060, 0x00000005); + } + } + + ast_moutdwm(ast, 0x1E6E0060, 0x00000006); +} + +/* + * Check DRAM Size + * 1Gb : 0x80000000 ~ 0x87FFFFFF + * 2Gb : 0x80000000 ~ 0x8FFFFFFF + * 4Gb : 0x80000000 ~ 0x9FFFFFFF + * 8Gb : 0x80000000 ~ 0xBFFFFFFF + */ +static void check_dram_size_2500(struct ast_device *ast, u32 tRFC) +{ + u32 reg_04, reg_14; + + reg_04 = ast_mindwm(ast, 0x1E6E0004) & 0xfffffffc; + reg_14 = ast_mindwm(ast, 0x1E6E0014) & 0xffffff00; + + ast_moutdwm(ast, 0xA0100000, 0x41424344); + ast_moutdwm(ast, 0x90100000, 0x35363738); + ast_moutdwm(ast, 0x88100000, 0x292A2B2C); + ast_moutdwm(ast, 0x80100000, 0x1D1E1F10); + + /* Check 8Gbit */ + if (ast_mindwm(ast, 0xA0100000) == 0x41424344) { + reg_04 |= 0x03; + reg_14 |= (tRFC >> 24) & 0xFF; + /* Check 4Gbit */ + } else if (ast_mindwm(ast, 0x90100000) == 0x35363738) { + reg_04 |= 0x02; + reg_14 |= (tRFC >> 16) & 0xFF; + /* Check 2Gbit */ + } else if (ast_mindwm(ast, 0x88100000) == 0x292A2B2C) { + reg_04 |= 0x01; + reg_14 |= (tRFC >> 8) & 0xFF; + } else { + reg_14 |= tRFC & 0xFF; + } + ast_moutdwm(ast, 0x1E6E0004, reg_04); + ast_moutdwm(ast, 0x1E6E0014, reg_14); +} + +static void enable_cache_2500(struct ast_device *ast) +{ + u32 reg_04, data; + + reg_04 = ast_mindwm(ast, 0x1E6E0004); + ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x1000); + + do + data = ast_mindwm(ast, 0x1E6E0004); + while (!(data & 0x80000)); + ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x400); +} + +static void set_mpll_2500(struct ast_device *ast) +{ + u32 addr, data, param; + + /* Reset MMC */ + ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); + ast_moutdwm(ast, 0x1E6E0034, 0x00020080); + for (addr = 0x1e6e0004; addr < 0x1e6e0090;) { + ast_moutdwm(ast, addr, 0x0); + addr += 4; + } + ast_moutdwm(ast, 0x1E6E0034, 0x00020000); + + ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); + data = ast_mindwm(ast, 0x1E6E2070) & 0x00800000; + if (data) { + /* CLKIN = 25MHz */ + param = 0x930023E0; + ast_moutdwm(ast, 0x1E6E2160, 0x00011320); + } else { + /* CLKIN = 24MHz */ + param = 0x93002400; + } + ast_moutdwm(ast, 0x1E6E2020, param); + udelay(100); +} + +static void reset_mmc_2500(struct ast_device *ast) +{ + ast_moutdwm(ast, 0x1E78505C, 0x00000004); + ast_moutdwm(ast, 0x1E785044, 0x00000001); + ast_moutdwm(ast, 0x1E785048, 0x00004755); + ast_moutdwm(ast, 0x1E78504C, 0x00000013); + mdelay(100); + ast_moutdwm(ast, 0x1E785054, 0x00000077); + ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); +} + +static void ddr3_init_2500(struct ast_device *ast, const u32 *ddr_table) +{ + ast_moutdwm(ast, 0x1E6E0004, 0x00000303); + ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]); + ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]); + ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]); + ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]); /* MODEREG4/6 */ + ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]); /* MODEREG5 */ + ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */ + ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]); /* MODEREG1/3 */ + + /* DDR PHY Setting */ + ast_moutdwm(ast, 0x1E6E0200, 0x02492AAE); + ast_moutdwm(ast, 0x1E6E0204, 0x00001001); + ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B); + ast_moutdwm(ast, 0x1E6E0210, 0x20000000); + ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]); + ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]); + ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]); + ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]); + ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]); + ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]); + ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]); + ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]); + ast_moutdwm(ast, 0x1E6E0290, 0x00100008); + ast_moutdwm(ast, 0x1E6E02C0, 0x00000006); + + /* Controller Setting */ + ast_moutdwm(ast, 0x1E6E0034, 0x00020091); + + /* Wait DDR PHY init done */ + ddr_phy_init_2500(ast); + + ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]); + ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81); + ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93); + + check_dram_size_2500(ast, ddr_table[REGIDX_RFC]); + enable_cache_2500(ast); + ast_moutdwm(ast, 0x1E6E001C, 0x00000008); + ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00); +} + +static void ddr4_init_2500(struct ast_device *ast, const u32 *ddr_table) +{ + u32 data, data2, pass, retrycnt; + u32 ddr_vref, phy_vref; + u32 min_ddr_vref = 0, min_phy_vref = 0; + u32 max_ddr_vref = 0, max_phy_vref = 0; + + ast_moutdwm(ast, 0x1E6E0004, 0x00000313); + ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]); + ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]); + ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]); + ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]); /* MODEREG4/6 */ + ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]); /* MODEREG5 */ + ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */ + ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]); /* MODEREG1/3 */ + + /* DDR PHY Setting */ + ast_moutdwm(ast, 0x1E6E0200, 0x42492AAE); + ast_moutdwm(ast, 0x1E6E0204, 0x09002000); + ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B); + ast_moutdwm(ast, 0x1E6E0210, 0x20000000); + ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]); + ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]); + ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]); + ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]); + ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]); + ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]); + ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]); + ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]); + ast_moutdwm(ast, 0x1E6E0290, 0x00100008); + ast_moutdwm(ast, 0x1E6E02C4, 0x3C183C3C); + ast_moutdwm(ast, 0x1E6E02C8, 0x00631E0E); + + /* Controller Setting */ + ast_moutdwm(ast, 0x1E6E0034, 0x0001A991); + + /* Train PHY Vref first */ + pass = 0; + + for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) { + max_phy_vref = 0x0; + pass = 0; + ast_moutdwm(ast, 0x1E6E02C0, 0x00001C06); + for (phy_vref = 0x40; phy_vref < 0x80; phy_vref++) { + ast_moutdwm(ast, 0x1E6E000C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0060, 0x00000000); + ast_moutdwm(ast, 0x1E6E02CC, phy_vref | (phy_vref << 8)); + /* Fire DFI Init */ + ddr_phy_init_2500(ast); + ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); + if (cbr_test_2500(ast)) { + pass++; + data = ast_mindwm(ast, 0x1E6E03D0); + data2 = data >> 8; + data = data & 0xff; + if (data > data2) + data = data2; + if (max_phy_vref < data) { + max_phy_vref = data; + min_phy_vref = phy_vref; + } + } else if (pass > 0) { + break; + } + } + } + ast_moutdwm(ast, 0x1E6E02CC, min_phy_vref | (min_phy_vref << 8)); + + /* Train DDR Vref next */ + pass = 0; + + for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) { + min_ddr_vref = 0xFF; + max_ddr_vref = 0x0; + pass = 0; + for (ddr_vref = 0x00; ddr_vref < 0x40; ddr_vref++) { + ast_moutdwm(ast, 0x1E6E000C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0060, 0x00000000); + ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8)); + /* Fire DFI Init */ + ddr_phy_init_2500(ast); + ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); + if (cbr_test_2500(ast)) { + pass++; + if (min_ddr_vref > ddr_vref) + min_ddr_vref = ddr_vref; + if (max_ddr_vref < ddr_vref) + max_ddr_vref = ddr_vref; + } else if (pass != 0) { + break; + } + } + } + + ast_moutdwm(ast, 0x1E6E000C, 0x00000000); + ast_moutdwm(ast, 0x1E6E0060, 0x00000000); + ddr_vref = (min_ddr_vref + max_ddr_vref + 1) >> 1; + ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8)); + + /* Wait DDR PHY init done */ + ddr_phy_init_2500(ast); + + ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]); + ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81); + ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93); + + check_dram_size_2500(ast, ddr_table[REGIDX_RFC]); + enable_cache_2500(ast); + ast_moutdwm(ast, 0x1E6E001C, 0x00000008); + ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00); +} + +static bool ast_dram_init_2500(struct ast_device *ast) +{ + u32 data; + u32 max_tries = 5; + + do { + if (max_tries-- == 0) + return false; + set_mpll_2500(ast); + reset_mmc_2500(ast); + ddr_init_common_2500(ast); + + data = ast_mindwm(ast, 0x1E6E2070); + if (data & 0x01000000) + ddr4_init_2500(ast, ast2500_ddr4_1600_timing_table); + else + ddr3_init_2500(ast, ast2500_ddr3_1600_timing_table); + } while (!ddr_test_2500(ast)); + + ast_moutdwm(ast, 0x1E6E2040, ast_mindwm(ast, 0x1E6E2040) | 0x41); + + /* Patch code */ + data = ast_mindwm(ast, 0x1E6E200C) & 0xF9FFFFFF; + ast_moutdwm(ast, 0x1E6E200C, data | 0x10000000); + + return true; +} + +static void ast_post_chip_2500(struct ast_device *ast) +{ + struct drm_device *dev = &ast->base; + u32 temp; + u8 reg; + + reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + if ((reg & AST_IO_VGACRD0_VRAM_INIT_STATUS_MASK) == 0) {/* vga only */ + /* Clear bus lock condition */ + ast_2500_patch_ahb(ast->regs); + + /* Disable watchdog */ + ast_moutdwm(ast, 0x1E78502C, 0x00000000); + ast_moutdwm(ast, 0x1E78504C, 0x00000000); + + /* + * Reset USB port to patch USB unknown device issue + * SCU90 is Multi-function Pin Control #5 + * [29]:= 1:Enable USB2.0 Host port#1 (that the mutually shared USB2.0 Hub + * port). + * SCU94 is Multi-function Pin Control #6 + * [14:13]:= 1x:USB2.0 Host2 controller + * SCU70 is Hardware Strap reg + * [23]:= 1:CLKIN is 25MHz and USBCK1 = 24/48 MHz (determined by + * [18]: 0(24)/1(48) MHz) + * SCU7C is Write clear reg to SCU70 + * [23]:= write 1 and then SCU70[23] will be clear as 0b. + */ + ast_moutdwm(ast, 0x1E6E2090, 0x20000000); + ast_moutdwm(ast, 0x1E6E2094, 0x00004000); + if (ast_mindwm(ast, 0x1E6E2070) & 0x00800000) { + ast_moutdwm(ast, 0x1E6E207C, 0x00800000); + mdelay(100); + ast_moutdwm(ast, 0x1E6E2070, 0x00800000); + } + /* Modify eSPI reset pin */ + temp = ast_mindwm(ast, 0x1E6E2070); + if (temp & 0x02000000) + ast_moutdwm(ast, 0x1E6E207C, 0x00004000); + + /* Slow down CPU/AHB CLK in VGA only mode */ + temp = ast_read32(ast, 0x12008); + temp |= 0x73; + ast_write32(ast, 0x12008, temp); + + if (!ast_dram_init_2500(ast)) + drm_err(dev, "DRAM init failed !\n"); + + temp = ast_mindwm(ast, 0x1e6e2040); + ast_moutdwm(ast, 0x1e6e2040, temp | 0x40); + } + + /* wait ready */ + do { + reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); + } while ((reg & 0x40) == 0); +} + +int ast_2500_post(struct ast_device *ast) +{ + ast_2300_set_def_ext_reg(ast); + + if (ast->config_mode == ast_use_p2a) { + ast_post_chip_2500(ast); + } else { + if (ast->tx_chip == AST_TX_SIL164) { + /* Enable DVO */ + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/ast/ast_2600.c b/drivers/gpu/drm/ast/ast_2600.c new file mode 100644 index 000000000000..8d75a47444f5 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_2600.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2012 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: Dave Airlie <airlied@redhat.com> + */ + +#include "ast_drv.h" +#include "ast_post.h" + +/* + * POST + */ + +int ast_2600_post(struct ast_device *ast) +{ + ast_2300_set_def_ext_reg(ast); + + if (ast->tx_chip == AST_TX_ASTDP) + return ast_dp_launch(ast); + + return 0; +} diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 19c04687b0fe..8e650a02c528 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -134,7 +134,7 @@ static int ast_astdp_read_edid_block(void *data, u8 *buf, unsigned int block, si * 3. The Delays are often longer a lot when system resume from S3/S4. */ if (j) - mdelay(j + 1); + msleep(j + 1); /* Wait for EDID offset to show up in mirror register */ vgacrd7 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xd7); diff --git a/drivers/gpu/drm/ast/ast_dram_tables.h b/drivers/gpu/drm/ast/ast_dram_tables.h deleted file mode 100644 index 1e9ac9d6d26c..000000000000 --- a/drivers/gpu/drm/ast/ast_dram_tables.h +++ /dev/null @@ -1,207 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef AST_DRAM_TABLES_H -#define AST_DRAM_TABLES_H - -/* DRAM timing tables */ -struct ast_dramstruct { - u16 index; - u32 data; -}; - -static const struct ast_dramstruct ast2000_dram_table_data[] = { - { 0x0108, 0x00000000 }, - { 0x0120, 0x00004a21 }, - { 0xFF00, 0x00000043 }, - { 0x0000, 0xFFFFFFFF }, - { 0x0004, 0x00000089 }, - { 0x0008, 0x22331353 }, - { 0x000C, 0x0d07000b }, - { 0x0010, 0x11113333 }, - { 0x0020, 0x00110350 }, - { 0x0028, 0x1e0828f0 }, - { 0x0024, 0x00000001 }, - { 0x001C, 0x00000000 }, - { 0x0014, 0x00000003 }, - { 0xFF00, 0x00000043 }, - { 0x0018, 0x00000131 }, - { 0x0014, 0x00000001 }, - { 0xFF00, 0x00000043 }, - { 0x0018, 0x00000031 }, - { 0x0014, 0x00000001 }, - { 0xFF00, 0x00000043 }, - { 0x0028, 0x1e0828f1 }, - { 0x0024, 0x00000003 }, - { 0x002C, 0x1f0f28fb }, - { 0x0030, 0xFFFFFE01 }, - { 0xFFFF, 0xFFFFFFFF } -}; - -static const struct ast_dramstruct ast1100_dram_table_data[] = { - { 0x2000, 0x1688a8a8 }, - { 0x2020, 0x000041f0 }, - { 0xFF00, 0x00000043 }, - { 0x0000, 0xfc600309 }, - { 0x006C, 0x00909090 }, - { 0x0064, 0x00050000 }, - { 0x0004, 0x00000585 }, - { 0x0008, 0x0011030f }, - { 0x0010, 0x22201724 }, - { 0x0018, 0x1e29011a }, - { 0x0020, 0x00c82222 }, - { 0x0014, 0x01001523 }, - { 0x001C, 0x1024010d }, - { 0x0024, 0x00cb2522 }, - { 0x0038, 0xffffff82 }, - { 0x003C, 0x00000000 }, - { 0x0040, 0x00000000 }, - { 0x0044, 0x00000000 }, - { 0x0048, 0x00000000 }, - { 0x004C, 0x00000000 }, - { 0x0050, 0x00000000 }, - { 0x0054, 0x00000000 }, - { 0x0058, 0x00000000 }, - { 0x005C, 0x00000000 }, - { 0x0060, 0x032aa02a }, - { 0x0064, 0x002d3000 }, - { 0x0068, 0x00000000 }, - { 0x0070, 0x00000000 }, - { 0x0074, 0x00000000 }, - { 0x0078, 0x00000000 }, - { 0x007C, 0x00000000 }, - { 0x0034, 0x00000001 }, - { 0xFF00, 0x00000043 }, - { 0x002C, 0x00000732 }, - { 0x0030, 0x00000040 }, - { 0x0028, 0x00000005 }, - { 0x0028, 0x00000007 }, - { 0x0028, 0x00000003 }, - { 0x0028, 0x00000001 }, - { 0x000C, 0x00005a08 }, - { 0x002C, 0x00000632 }, - { 0x0028, 0x00000001 }, - { 0x0030, 0x000003c0 }, - { 0x0028, 0x00000003 }, - { 0x0030, 0x00000040 }, - { 0x0028, 0x00000003 }, - { 0x000C, 0x00005a21 }, - { 0x0034, 0x00007c03 }, - { 0x0120, 0x00004c41 }, - { 0xffff, 0xffffffff }, -}; - -static const struct ast_dramstruct ast2100_dram_table_data[] = { - { 0x2000, 0x1688a8a8 }, - { 0x2020, 0x00004120 }, - { 0xFF00, 0x00000043 }, - { 0x0000, 0xfc600309 }, - { 0x006C, 0x00909090 }, - { 0x0064, 0x00070000 }, - { 0x0004, 0x00000489 }, - { 0x0008, 0x0011030f }, - { 0x0010, 0x32302926 }, - { 0x0018, 0x274c0122 }, - { 0x0020, 0x00ce2222 }, - { 0x0014, 0x01001523 }, - { 0x001C, 0x1024010d }, - { 0x0024, 0x00cb2522 }, - { 0x0038, 0xffffff82 }, - { 0x003C, 0x00000000 }, - { 0x0040, 0x00000000 }, - { 0x0044, 0x00000000 }, - { 0x0048, 0x00000000 }, - { 0x004C, 0x00000000 }, - { 0x0050, 0x00000000 }, - { 0x0054, 0x00000000 }, - { 0x0058, 0x00000000 }, - { 0x005C, 0x00000000 }, - { 0x0060, 0x0f2aa02a }, - { 0x0064, 0x003f3005 }, - { 0x0068, 0x02020202 }, - { 0x0070, 0x00000000 }, - { 0x0074, 0x00000000 }, - { 0x0078, 0x00000000 }, - { 0x007C, 0x00000000 }, - { 0x0034, 0x00000001 }, - { 0xFF00, 0x00000043 }, - { 0x002C, 0x00000942 }, - { 0x0030, 0x00000040 }, - { 0x0028, 0x00000005 }, - { 0x0028, 0x00000007 }, - { 0x0028, 0x00000003 }, - { 0x0028, 0x00000001 }, - { 0x000C, 0x00005a08 }, - { 0x002C, 0x00000842 }, - { 0x0028, 0x00000001 }, - { 0x0030, 0x000003c0 }, - { 0x0028, 0x00000003 }, - { 0x0030, 0x00000040 }, - { 0x0028, 0x00000003 }, - { 0x000C, 0x00005a21 }, - { 0x0034, 0x00007c03 }, - { 0x0120, 0x00005061 }, - { 0xffff, 0xffffffff }, -}; - -/* - * AST2500 DRAM settings modules - */ -#define REGTBL_NUM 17 -#define REGIDX_010 0 -#define REGIDX_014 1 -#define REGIDX_018 2 -#define REGIDX_020 3 -#define REGIDX_024 4 -#define REGIDX_02C 5 -#define REGIDX_030 6 -#define REGIDX_214 7 -#define REGIDX_2E0 8 -#define REGIDX_2E4 9 -#define REGIDX_2E8 10 -#define REGIDX_2EC 11 -#define REGIDX_2F0 12 -#define REGIDX_2F4 13 -#define REGIDX_2F8 14 -#define REGIDX_RFC 15 -#define REGIDX_PLL 16 - -static const u32 ast2500_ddr3_1600_timing_table[REGTBL_NUM] = { - 0x64604D38, /* 0x010 */ - 0x29690599, /* 0x014 */ - 0x00000300, /* 0x018 */ - 0x00000000, /* 0x020 */ - 0x00000000, /* 0x024 */ - 0x02181E70, /* 0x02C */ - 0x00000040, /* 0x030 */ - 0x00000024, /* 0x214 */ - 0x02001300, /* 0x2E0 */ - 0x0E0000A0, /* 0x2E4 */ - 0x000E001B, /* 0x2E8 */ - 0x35B8C105, /* 0x2EC */ - 0x08090408, /* 0x2F0 */ - 0x9B000800, /* 0x2F4 */ - 0x0E400A00, /* 0x2F8 */ - 0x9971452F, /* tRFC */ - 0x000071C1 /* PLL */ -}; - -static const u32 ast2500_ddr4_1600_timing_table[REGTBL_NUM] = { - 0x63604E37, /* 0x010 */ - 0xE97AFA99, /* 0x014 */ - 0x00019000, /* 0x018 */ - 0x08000000, /* 0x020 */ - 0x00000400, /* 0x024 */ - 0x00000410, /* 0x02C */ - 0x00000101, /* 0x030 */ - 0x00000024, /* 0x214 */ - 0x03002900, /* 0x2E0 */ - 0x0E0000A0, /* 0x2E4 */ - 0x000E001C, /* 0x2E8 */ - 0x35B8C106, /* 0x2EC */ - 0x08080607, /* 0x2F0 */ - 0x9B000900, /* 0x2F4 */ - 0x0E400A00, /* 0x2F8 */ - 0x99714545, /* tRFC */ - 0x000071C1 /* PLL */ -}; - -#endif diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 054acda41909..473faa92d08c 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -64,7 +64,7 @@ static const struct drm_driver ast_driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, - DRM_GEM_SHMEM_DRIVER_OPS_NO_MAP_SGT, + DRM_GEM_SHMEM_DRIVER_OPS, DRM_FBDEV_SHMEM_DRIVER_OPS, }; @@ -171,7 +171,7 @@ static int ast_detect_chip(struct pci_dev *pdev, /* Patch AST2500/AST2510 */ if ((pdev->revision & 0xf0) == 0x40) { if (!(vgacrd0 & AST_IO_VGACRD0_VRAM_INIT_STATUS_MASK)) - ast_patch_ahb_2500(regs); + ast_2500_patch_ahb(regs); } /* Double check that it's actually working */ diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 2ee402096cd9..c15aef014f69 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -98,13 +98,15 @@ enum ast_config_mode { ast_use_defaults }; -#define AST_DRAM_512Mx16 0 -#define AST_DRAM_1Gx16 1 -#define AST_DRAM_512Mx32 2 -#define AST_DRAM_1Gx32 3 -#define AST_DRAM_2Gx16 6 -#define AST_DRAM_4Gx16 7 -#define AST_DRAM_8Gx16 8 +enum ast_dram_layout { + AST_DRAM_512Mx16 = 0, + AST_DRAM_1Gx16 = 1, + AST_DRAM_512Mx32 = 2, + AST_DRAM_1Gx32 = 3, + AST_DRAM_2Gx16 = 6, + AST_DRAM_4Gx16 = 7, + AST_DRAM_8Gx16 = 8, +}; /* * Hardware cursor @@ -172,10 +174,6 @@ struct ast_device { enum ast_config_mode config_mode; enum ast_chip chip; - uint32_t dram_bus_width; - uint32_t dram_type; - uint32_t mclk; - void __iomem *vram; unsigned long vram_base; unsigned long vram_size; @@ -417,11 +415,26 @@ struct ast_crtc_state { int ast_mm_init(struct ast_device *ast); +/* ast_2000.c */ +int ast_2000_post(struct ast_device *ast); + +/* ast_2100.c */ +int ast_2100_post(struct ast_device *ast); + +/* ast_2300.c */ +int ast_2300_post(struct ast_device *ast); + +/* ast_2500.c */ +void ast_2500_patch_ahb(void __iomem *regs); +int ast_2500_post(struct ast_device *ast); + +/* ast_2600.c */ +int ast_2600_post(struct ast_device *ast); + /* ast post */ int ast_post_gpu(struct ast_device *ast); u32 ast_mindwm(struct ast_device *ast, u32 r); void ast_moutdwm(struct ast_device *ast, u32 r, u32 v); -void ast_patch_ahb_2500(void __iomem *regs); int ast_vga_output_init(struct ast_device *ast); int ast_sil164_output_init(struct ast_device *ast); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 44b9b5f659fc..3eea6a6cdacd 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -210,126 +210,6 @@ static void ast_detect_tx_chip(struct ast_device *ast, bool need_post) drm_info(dev, "Using %s\n", info_str[ast->tx_chip]); } -static int ast_get_dram_info(struct ast_device *ast) -{ - struct drm_device *dev = &ast->base; - struct device_node *np = dev->dev->of_node; - uint32_t mcr_cfg, mcr_scu_mpll, mcr_scu_strap; - uint32_t denum, num, div, ref_pll, dsel; - - switch (ast->config_mode) { - case ast_use_dt: - /* - * If some properties are missing, use reasonable - * defaults for GEN5 - */ - if (of_property_read_u32(np, "aspeed,mcr-configuration", - &mcr_cfg)) - mcr_cfg = 0x00000577; - if (of_property_read_u32(np, "aspeed,mcr-scu-mpll", - &mcr_scu_mpll)) - mcr_scu_mpll = 0x000050C0; - if (of_property_read_u32(np, "aspeed,mcr-scu-strap", - &mcr_scu_strap)) - mcr_scu_strap = 0; - break; - case ast_use_p2a: - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - mcr_cfg = ast_read32(ast, 0x10004); - mcr_scu_mpll = ast_read32(ast, 0x10120); - mcr_scu_strap = ast_read32(ast, 0x10170); - break; - case ast_use_defaults: - default: - ast->dram_bus_width = 16; - ast->dram_type = AST_DRAM_1Gx16; - if (IS_AST_GEN6(ast)) - ast->mclk = 800; - else - ast->mclk = 396; - return 0; - } - - if (mcr_cfg & 0x40) - ast->dram_bus_width = 16; - else - ast->dram_bus_width = 32; - - if (IS_AST_GEN6(ast)) { - switch (mcr_cfg & 0x03) { - case 0: - ast->dram_type = AST_DRAM_1Gx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_2Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_4Gx16; - break; - case 3: - ast->dram_type = AST_DRAM_8Gx16; - break; - } - } else if (IS_AST_GEN4(ast) || IS_AST_GEN5(ast)) { - switch (mcr_cfg & 0x03) { - case 0: - ast->dram_type = AST_DRAM_512Mx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_1Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_2Gx16; - break; - case 3: - ast->dram_type = AST_DRAM_4Gx16; - break; - } - } else { - switch (mcr_cfg & 0x0c) { - case 0: - case 4: - ast->dram_type = AST_DRAM_512Mx16; - break; - case 8: - if (mcr_cfg & 0x40) - ast->dram_type = AST_DRAM_1Gx16; - else - ast->dram_type = AST_DRAM_512Mx32; - break; - case 0xc: - ast->dram_type = AST_DRAM_1Gx32; - break; - } - } - - if (mcr_scu_strap & 0x2000) - ref_pll = 14318; - else - ref_pll = 12000; - - denum = mcr_scu_mpll & 0x1f; - num = (mcr_scu_mpll & 0x3fe0) >> 5; - dsel = (mcr_scu_mpll & 0xc000) >> 14; - switch (dsel) { - case 3: - div = 0x4; - break; - case 2: - case 1: - div = 0x2; - break; - default: - div = 0x1; - break; - } - ast->mclk = ref_pll * (num + 2) / ((denum + 2) * (div * 1000)); - return 0; -} - struct drm_device *ast_device_create(struct pci_dev *pdev, const struct drm_driver *drv, enum ast_chip chip, @@ -352,12 +232,6 @@ struct drm_device *ast_device_create(struct pci_dev *pdev, ast->regs = regs; ast->ioregs = ioregs; - ret = ast_get_dram_info(ast); - if (ret) - return ERR_PTR(ret); - drm_info(dev, "dram MCLK=%u Mhz type=%d bus_width=%d\n", - ast->mclk, ast->dram_type, ast->dram_bus_width); - ast_detect_tx_chip(ast, need_post); switch (ast->tx_chip) { case AST_TX_ASTDP: diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 7908087bcb5a..30b011ed0a05 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -29,7 +29,6 @@ */ #include <linux/delay.h> -#include <linux/export.h> #include <linux/pci.h> #include <drm/drm_atomic.h> @@ -837,22 +836,24 @@ ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, static void ast_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct ast_device *ast = to_ast_device(crtc->dev); + u8 vgacr17 = 0x00; + u8 vgacrb6 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, 0x00); - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, 0x00); + vgacr17 |= AST_IO_VGACR17_SYNC_ENABLE; + vgacrb6 &= ~(AST_IO_VGACRB6_VSYNC_OFF | AST_IO_VGACRB6_HSYNC_OFF); + + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); } static void ast_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); struct ast_device *ast = to_ast_device(crtc->dev); - u8 vgacrb6; + u8 vgacr17 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, AST_IO_VGASR1_SD); - - vgacrb6 = AST_IO_VGACRB6_VSYNC_OFF | - AST_IO_VGACRB6_HSYNC_OFF; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); + vgacr17 &= ~AST_IO_VGACR17_SYNC_ENABLE; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); /* * HW cursors require the underlying primary plane and CRTC to diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 37568cf3822c..b72914dbed38 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -31,51 +31,10 @@ #include <drm/drm_print.h> -#include "ast_dram_tables.h" #include "ast_drv.h" +#include "ast_post.h" -static void ast_post_chip_2300(struct ast_device *ast); -static void ast_post_chip_2500(struct ast_device *ast); - -static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; -static const u8 extreginfo_ast2300[] = { 0x0f, 0x04, 0x1f, 0xff }; - -static void ast_set_def_ext_reg(struct ast_device *ast) -{ - u8 i, index, reg; - const u8 *ext_reg_info; - - /* reset scratch */ - for (i = 0x81; i <= 0x9f; i++) - ast_set_index_reg(ast, AST_IO_VGACRI, i, 0x00); - - if (IS_AST_GEN4(ast) || IS_AST_GEN5(ast) || IS_AST_GEN6(ast)) - ext_reg_info = extreginfo_ast2300; - else - ext_reg_info = extreginfo; - - index = 0xa0; - while (*ext_reg_info != 0xff) { - ast_set_index_reg_mask(ast, AST_IO_VGACRI, index, 0x00, *ext_reg_info); - index++; - ext_reg_info++; - } - - /* disable standard IO/MEM decode if secondary */ - /* ast_set_index_reg-mask(ast, AST_IO_VGACRI, 0xa1, 0xff, 0x3); */ - - /* Set Ext. Default */ - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x8c, 0x00, 0x01); - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb7, 0x00, 0x00); - - /* Enable RAMDAC for A1 */ - reg = 0x04; - if (IS_AST_GEN4(ast) || IS_AST_GEN5(ast) || IS_AST_GEN6(ast)) - reg |= 0x20; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xff, reg); -} - -static u32 __ast_mindwm(void __iomem *regs, u32 r) +u32 __ast_mindwm(void __iomem *regs, u32 r) { u32 data; @@ -89,7 +48,7 @@ static u32 __ast_mindwm(void __iomem *regs, u32 r) return __ast_read32(regs, 0x10000 + (r & 0x0000ffff)); } -static void __ast_moutdwm(void __iomem *regs, u32 r, u32 v) +void __ast_moutdwm(void __iomem *regs, u32 r, u32 v) { u32 data; @@ -113,332 +72,38 @@ void ast_moutdwm(struct ast_device *ast, u32 r, u32 v) __ast_moutdwm(ast->regs, r, v); } -/* - * AST2100/2150 DLL CBR Setting - */ -#define CBR_SIZE_AST2150 ((16 << 10) - 1) -#define CBR_PASSNUM_AST2150 5 -#define CBR_THRESHOLD_AST2150 10 -#define CBR_THRESHOLD2_AST2150 10 -#define TIMEOUT_AST2150 5000000 - -#define CBR_PATNUM_AST2150 8 - -static const u32 pattern_AST2150[14] = { - 0xFF00FF00, - 0xCC33CC33, - 0xAA55AA55, - 0xFFFE0001, - 0x683501FE, - 0x0F1929B0, - 0x2D0B4346, - 0x60767F02, - 0x6FBE36A6, - 0x3A253035, - 0x3019686D, - 0x41C6167E, - 0x620152BF, - 0x20F050E0 -}; - -static u32 mmctestburst2_ast2150(struct ast_device *ast, u32 datagen) -{ - u32 data, timeout; - - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - ast_moutdwm(ast, 0x1e6e0070, 0x00000001 | (datagen << 3)); - timeout = 0; - do { - data = ast_mindwm(ast, 0x1e6e0070) & 0x40; - if (++timeout > TIMEOUT_AST2150) { - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return 0xffffffff; - } - } while (!data); - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - ast_moutdwm(ast, 0x1e6e0070, 0x00000003 | (datagen << 3)); - timeout = 0; - do { - data = ast_mindwm(ast, 0x1e6e0070) & 0x40; - if (++timeout > TIMEOUT_AST2150) { - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return 0xffffffff; - } - } while (!data); - data = (ast_mindwm(ast, 0x1e6e0070) & 0x80) >> 7; - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return data; -} - -#if 0 /* unused in DDX driver - here for completeness */ -static u32 mmctestsingle2_ast2150(struct ast_device *ast, u32 datagen) -{ - u32 data, timeout; - - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - ast_moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3)); - timeout = 0; - do { - data = ast_mindwm(ast, 0x1e6e0070) & 0x40; - if (++timeout > TIMEOUT_AST2150) { - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return 0xffffffff; - } - } while (!data); - data = (ast_mindwm(ast, 0x1e6e0070) & 0x80) >> 7; - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return data; -} -#endif - -static int cbrtest_ast2150(struct ast_device *ast) -{ - int i; - - for (i = 0; i < 8; i++) - if (mmctestburst2_ast2150(ast, i)) - return 0; - return 1; -} - -static int cbrscan_ast2150(struct ast_device *ast, int busw) -{ - u32 patcnt, loop; - - for (patcnt = 0; patcnt < CBR_PATNUM_AST2150; patcnt++) { - ast_moutdwm(ast, 0x1e6e007c, pattern_AST2150[patcnt]); - for (loop = 0; loop < CBR_PASSNUM_AST2150; loop++) { - if (cbrtest_ast2150(ast)) - break; - } - if (loop == CBR_PASSNUM_AST2150) - return 0; - } - return 1; -} - - -static void cbrdlli_ast2150(struct ast_device *ast, int busw) -{ - u32 dll_min[4], dll_max[4], dlli, data, passcnt; - -cbr_start: - dll_min[0] = dll_min[1] = dll_min[2] = dll_min[3] = 0xff; - dll_max[0] = dll_max[1] = dll_max[2] = dll_max[3] = 0x0; - passcnt = 0; - - for (dlli = 0; dlli < 100; dlli++) { - ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24)); - data = cbrscan_ast2150(ast, busw); - if (data != 0) { - if (data & 0x1) { - if (dll_min[0] > dlli) - dll_min[0] = dlli; - if (dll_max[0] < dlli) - dll_max[0] = dlli; - } - passcnt++; - } else if (passcnt >= CBR_THRESHOLD_AST2150) - goto cbr_start; - } - if (dll_max[0] == 0 || (dll_max[0]-dll_min[0]) < CBR_THRESHOLD_AST2150) - goto cbr_start; - - dlli = dll_min[0] + (((dll_max[0] - dll_min[0]) * 7) >> 4); - ast_moutdwm(ast, 0x1e6e0068, dlli | (dlli << 8) | (dlli << 16) | (dlli << 24)); -} - - - -static void ast_init_dram_reg(struct ast_device *ast) -{ - u8 j; - u32 data, temp, i; - const struct ast_dramstruct *dram_reg_info; - - j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - - if ((j & 0x80) == 0) { /* VGA only */ - if (IS_AST_GEN1(ast)) { - dram_reg_info = ast2000_dram_table_data; - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - ast_write32(ast, 0x10100, 0xa8); - - do { - ; - } while (ast_read32(ast, 0x10100) != 0xa8); - } else { /* GEN2/GEN3 */ - if (ast->chip == AST2100 || ast->chip == AST2200) - dram_reg_info = ast2100_dram_table_data; - else - dram_reg_info = ast1100_dram_table_data; - - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - ast_write32(ast, 0x12000, 0x1688A8A8); - do { - ; - } while (ast_read32(ast, 0x12000) != 0x01); - - ast_write32(ast, 0x10000, 0xfc600309); - do { - ; - } while (ast_read32(ast, 0x10000) != 0x01); - } - - while (dram_reg_info->index != 0xffff) { - if (dram_reg_info->index == 0xff00) {/* delay fn */ - for (i = 0; i < 15; i++) - udelay(dram_reg_info->data); - } else if (dram_reg_info->index == 0x4 && !IS_AST_GEN1(ast)) { - data = dram_reg_info->data; - if (ast->dram_type == AST_DRAM_1Gx16) - data = 0x00000d89; - else if (ast->dram_type == AST_DRAM_1Gx32) - data = 0x00000c8d; - - temp = ast_read32(ast, 0x12070); - temp &= 0xc; - temp <<= 2; - ast_write32(ast, 0x10000 + dram_reg_info->index, data | temp); - } else - ast_write32(ast, 0x10000 + dram_reg_info->index, dram_reg_info->data); - dram_reg_info++; - } - - /* AST 2100/2150 DRAM calibration */ - data = ast_read32(ast, 0x10120); - if (data == 0x5061) { /* 266Mhz */ - data = ast_read32(ast, 0x10004); - if (data & 0x40) - cbrdlli_ast2150(ast, 16); /* 16 bits */ - else - cbrdlli_ast2150(ast, 32); /* 32 bits */ - } - - switch (AST_GEN(ast)) { - case 1: - temp = ast_read32(ast, 0x10140); - ast_write32(ast, 0x10140, temp | 0x40); - break; - case 2: - case 3: - temp = ast_read32(ast, 0x1200c); - ast_write32(ast, 0x1200c, temp & 0xfffffffd); - temp = ast_read32(ast, 0x12040); - ast_write32(ast, 0x12040, temp | 0x40); - break; - default: - break; - } - } - - /* wait ready */ - do { - j = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - } while ((j & 0x40) == 0); -} - int ast_post_gpu(struct ast_device *ast) { int ret; - ast_set_def_ext_reg(ast); - if (AST_GEN(ast) >= 7) { - if (ast->tx_chip == AST_TX_ASTDP) { - ret = ast_dp_launch(ast); - if (ret) - return ret; - } + ret = ast_2600_post(ast); + if (ret) + return ret; } else if (AST_GEN(ast) >= 6) { - if (ast->config_mode == ast_use_p2a) { - ast_post_chip_2500(ast); - } else { - if (ast->tx_chip == AST_TX_SIL164) { - /* Enable DVO */ - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); - } - } + ret = ast_2500_post(ast); + if (ret) + return ret; } else if (AST_GEN(ast) >= 4) { - if (ast->config_mode == ast_use_p2a) { - ast_post_chip_2300(ast); - ast_init_3rdtx(ast); - } else { - if (ast->tx_chip == AST_TX_SIL164) { - /* Enable DVO */ - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); - } - } + ret = ast_2300_post(ast); + if (ret) + return ret; + } else if (AST_GEN(ast) >= 2) { + ret = ast_2100_post(ast); + if (ret) + return ret; } else { - if (ast->config_mode == ast_use_p2a) { - ast_init_dram_reg(ast); - } else { - if (ast->tx_chip == AST_TX_SIL164) { - /* Enable DVO */ - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xa3, 0xcf, 0x80); - } - } + ret = ast_2000_post(ast); + if (ret) + return ret; } return 0; } -/* AST 2300 DRAM settings */ -#define AST_DDR3 0 -#define AST_DDR2 1 - -struct ast2300_dram_param { - u32 dram_type; - u32 dram_chipid; - u32 dram_freq; - u32 vram_size; - u32 odt; - u32 wodt; - u32 rodt; - u32 dram_config; - u32 reg_PERIOD; - u32 reg_MADJ; - u32 reg_SADJ; - u32 reg_MRS; - u32 reg_EMRS; - u32 reg_AC1; - u32 reg_AC2; - u32 reg_DQSIC; - u32 reg_DRV; - u32 reg_IOZ; - u32 reg_DQIDLY; - u32 reg_FREQ; - u32 madj_max; - u32 dll2_finetune_step; -}; - -/* - * DQSI DLL CBR Setting - */ -#define CBR_SIZE0 ((1 << 10) - 1) -#define CBR_SIZE1 ((4 << 10) - 1) -#define CBR_SIZE2 ((64 << 10) - 1) -#define CBR_PASSNUM 5 -#define CBR_PASSNUM2 5 -#define CBR_THRESHOLD 10 -#define CBR_THRESHOLD2 10 #define TIMEOUT 5000000 -#define CBR_PATNUM 8 -static const u32 pattern[8] = { - 0xFF00FF00, - 0xCC33CC33, - 0xAA55AA55, - 0x88778877, - 0x92CC4D6E, - 0x543D3CDE, - 0xF1E843C7, - 0x7C61D253 -}; - -static bool mmc_test(struct ast_device *ast, u32 datagen, u8 test_ctl) +bool mmc_test(struct ast_device *ast, u32 datagen, u8 test_ctl) { u32 data, timeout; @@ -458,1657 +123,7 @@ static bool mmc_test(struct ast_device *ast, u32 datagen, u8 test_ctl) return true; } -static u32 mmc_test2(struct ast_device *ast, u32 datagen, u8 test_ctl) -{ - u32 data, timeout; - - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl); - timeout = 0; - do { - data = ast_mindwm(ast, 0x1e6e0070) & 0x1000; - if (++timeout > TIMEOUT) { - ast_moutdwm(ast, 0x1e6e0070, 0x0); - return 0xffffffff; - } - } while (!data); - data = ast_mindwm(ast, 0x1e6e0078); - data = (data | (data >> 16)) & 0xffff; - ast_moutdwm(ast, 0x1e6e0070, 0x00000000); - return data; -} - - -static bool mmc_test_burst(struct ast_device *ast, u32 datagen) +bool mmc_test_burst(struct ast_device *ast, u32 datagen) { return mmc_test(ast, datagen, 0xc1); } - -static u32 mmc_test_burst2(struct ast_device *ast, u32 datagen) -{ - return mmc_test2(ast, datagen, 0x41); -} - -static bool mmc_test_single(struct ast_device *ast, u32 datagen) -{ - return mmc_test(ast, datagen, 0xc5); -} - -static u32 mmc_test_single2(struct ast_device *ast, u32 datagen) -{ - return mmc_test2(ast, datagen, 0x05); -} - -static bool mmc_test_single_2500(struct ast_device *ast, u32 datagen) -{ - return mmc_test(ast, datagen, 0x85); -} - -static int cbr_test(struct ast_device *ast) -{ - u32 data; - int i; - data = mmc_test_single2(ast, 0); - if ((data & 0xff) && (data & 0xff00)) - return 0; - for (i = 0; i < 8; i++) { - data = mmc_test_burst2(ast, i); - if ((data & 0xff) && (data & 0xff00)) - return 0; - } - if (!data) - return 3; - else if (data & 0xff) - return 2; - return 1; -} - -static int cbr_scan(struct ast_device *ast) -{ - u32 data, data2, patcnt, loop; - - data2 = 3; - for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { - ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); - for (loop = 0; loop < CBR_PASSNUM2; loop++) { - if ((data = cbr_test(ast)) != 0) { - data2 &= data; - if (!data2) - return 0; - break; - } - } - if (loop == CBR_PASSNUM2) - return 0; - } - return data2; -} - -static u32 cbr_test2(struct ast_device *ast) -{ - u32 data; - - data = mmc_test_burst2(ast, 0); - if (data == 0xffff) - return 0; - data |= mmc_test_single2(ast, 0); - if (data == 0xffff) - return 0; - - return ~data & 0xffff; -} - -static u32 cbr_scan2(struct ast_device *ast) -{ - u32 data, data2, patcnt, loop; - - data2 = 0xffff; - for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { - ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); - for (loop = 0; loop < CBR_PASSNUM2; loop++) { - if ((data = cbr_test2(ast)) != 0) { - data2 &= data; - if (!data2) - return 0; - break; - } - } - if (loop == CBR_PASSNUM2) - return 0; - } - return data2; -} - -static bool cbr_test3(struct ast_device *ast) -{ - if (!mmc_test_burst(ast, 0)) - return false; - if (!mmc_test_single(ast, 0)) - return false; - return true; -} - -static bool cbr_scan3(struct ast_device *ast) -{ - u32 patcnt, loop; - - for (patcnt = 0; patcnt < CBR_PATNUM; patcnt++) { - ast_moutdwm(ast, 0x1e6e007c, pattern[patcnt]); - for (loop = 0; loop < 2; loop++) { - if (cbr_test3(ast)) - break; - } - if (loop == 2) - return false; - } - return true; -} - -static bool finetuneDQI_L(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 gold_sadj[2], dllmin[16], dllmax[16], dlli, data, cnt, mask, passcnt, retry = 0; - bool status = false; -FINETUNE_START: - for (cnt = 0; cnt < 16; cnt++) { - dllmin[cnt] = 0xff; - dllmax[cnt] = 0x0; - } - passcnt = 0; - for (dlli = 0; dlli < 76; dlli++) { - ast_moutdwm(ast, 0x1E6E0068, 0x00001400 | (dlli << 16) | (dlli << 24)); - ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE1); - data = cbr_scan2(ast); - if (data != 0) { - mask = 0x00010001; - for (cnt = 0; cnt < 16; cnt++) { - if (data & mask) { - if (dllmin[cnt] > dlli) { - dllmin[cnt] = dlli; - } - if (dllmax[cnt] < dlli) { - dllmax[cnt] = dlli; - } - } - mask <<= 1; - } - passcnt++; - } else if (passcnt >= CBR_THRESHOLD2) { - break; - } - } - gold_sadj[0] = 0x0; - passcnt = 0; - for (cnt = 0; cnt < 16; cnt++) { - if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { - gold_sadj[0] += dllmin[cnt]; - passcnt++; - } - } - if (retry++ > 10) - goto FINETUNE_DONE; - if (passcnt != 16) { - goto FINETUNE_START; - } - status = true; -FINETUNE_DONE: - gold_sadj[0] = gold_sadj[0] >> 4; - gold_sadj[1] = gold_sadj[0]; - - data = 0; - for (cnt = 0; cnt < 8; cnt++) { - data >>= 3; - if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { - dlli = dllmin[cnt]; - if (gold_sadj[0] >= dlli) { - dlli = ((gold_sadj[0] - dlli) * 19) >> 5; - if (dlli > 3) { - dlli = 3; - } - } else { - dlli = ((dlli - gold_sadj[0]) * 19) >> 5; - if (dlli > 4) { - dlli = 4; - } - dlli = (8 - dlli) & 0x7; - } - data |= dlli << 21; - } - } - ast_moutdwm(ast, 0x1E6E0080, data); - - data = 0; - for (cnt = 8; cnt < 16; cnt++) { - data >>= 3; - if ((dllmax[cnt] > dllmin[cnt]) && ((dllmax[cnt] - dllmin[cnt]) >= CBR_THRESHOLD2)) { - dlli = dllmin[cnt]; - if (gold_sadj[1] >= dlli) { - dlli = ((gold_sadj[1] - dlli) * 19) >> 5; - if (dlli > 3) { - dlli = 3; - } else { - dlli = (dlli - 1) & 0x7; - } - } else { - dlli = ((dlli - gold_sadj[1]) * 19) >> 5; - dlli += 1; - if (dlli > 4) { - dlli = 4; - } - dlli = (8 - dlli) & 0x7; - } - data |= dlli << 21; - } - } - ast_moutdwm(ast, 0x1E6E0084, data); - return status; -} /* finetuneDQI_L */ - -static void finetuneDQSI(struct ast_device *ast) -{ - u32 dlli, dqsip, dqidly; - u32 reg_mcr18, reg_mcr0c, passcnt[2], diff; - u32 g_dqidly, g_dqsip, g_margin, g_side; - u16 pass[32][2][2]; - char tag[2][76]; - - /* Disable DQI CBR */ - reg_mcr0c = ast_mindwm(ast, 0x1E6E000C); - reg_mcr18 = ast_mindwm(ast, 0x1E6E0018); - reg_mcr18 &= 0x0000ffff; - ast_moutdwm(ast, 0x1E6E0018, reg_mcr18); - - for (dlli = 0; dlli < 76; dlli++) { - tag[0][dlli] = 0x0; - tag[1][dlli] = 0x0; - } - for (dqidly = 0; dqidly < 32; dqidly++) { - pass[dqidly][0][0] = 0xff; - pass[dqidly][0][1] = 0x0; - pass[dqidly][1][0] = 0xff; - pass[dqidly][1][1] = 0x0; - } - for (dqidly = 0; dqidly < 32; dqidly++) { - passcnt[0] = passcnt[1] = 0; - for (dqsip = 0; dqsip < 2; dqsip++) { - ast_moutdwm(ast, 0x1E6E000C, 0); - ast_moutdwm(ast, 0x1E6E0018, reg_mcr18 | (dqidly << 16) | (dqsip << 23)); - ast_moutdwm(ast, 0x1E6E000C, reg_mcr0c); - for (dlli = 0; dlli < 76; dlli++) { - ast_moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24)); - ast_moutdwm(ast, 0x1E6E0070, 0); - ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE0); - if (cbr_scan3(ast)) { - if (dlli == 0) - break; - passcnt[dqsip]++; - tag[dqsip][dlli] = 'P'; - if (dlli < pass[dqidly][dqsip][0]) - pass[dqidly][dqsip][0] = (u16) dlli; - if (dlli > pass[dqidly][dqsip][1]) - pass[dqidly][dqsip][1] = (u16) dlli; - } else if (passcnt[dqsip] >= 5) - break; - else { - pass[dqidly][dqsip][0] = 0xff; - pass[dqidly][dqsip][1] = 0x0; - } - } - } - if (passcnt[0] == 0 && passcnt[1] == 0) - dqidly++; - } - /* Search margin */ - g_dqidly = g_dqsip = g_margin = g_side = 0; - - for (dqidly = 0; dqidly < 32; dqidly++) { - for (dqsip = 0; dqsip < 2; dqsip++) { - if (pass[dqidly][dqsip][0] > pass[dqidly][dqsip][1]) - continue; - diff = pass[dqidly][dqsip][1] - pass[dqidly][dqsip][0]; - if ((diff+2) < g_margin) - continue; - passcnt[0] = passcnt[1] = 0; - for (dlli = pass[dqidly][dqsip][0]; dlli > 0 && tag[dqsip][dlli] != 0; dlli--, passcnt[0]++); - for (dlli = pass[dqidly][dqsip][1]; dlli < 76 && tag[dqsip][dlli] != 0; dlli++, passcnt[1]++); - if (passcnt[0] > passcnt[1]) - passcnt[0] = passcnt[1]; - passcnt[1] = 0; - if (passcnt[0] > g_side) - passcnt[1] = passcnt[0] - g_side; - if (diff > (g_margin+1) && (passcnt[1] > 0 || passcnt[0] > 8)) { - g_margin = diff; - g_dqidly = dqidly; - g_dqsip = dqsip; - g_side = passcnt[0]; - } else if (passcnt[1] > 1 && g_side < 8) { - if (diff > g_margin) - g_margin = diff; - g_dqidly = dqidly; - g_dqsip = dqsip; - g_side = passcnt[0]; - } - } - } - reg_mcr18 = reg_mcr18 | (g_dqidly << 16) | (g_dqsip << 23); - ast_moutdwm(ast, 0x1E6E0018, reg_mcr18); - -} -static bool cbr_dll2(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 dllmin[2], dllmax[2], dlli, data, passcnt, retry = 0; - bool status = false; - - finetuneDQSI(ast); - if (finetuneDQI_L(ast, param) == false) - return status; - -CBR_START2: - dllmin[0] = dllmin[1] = 0xff; - dllmax[0] = dllmax[1] = 0x0; - passcnt = 0; - for (dlli = 0; dlli < 76; dlli++) { - ast_moutdwm(ast, 0x1E6E0068, 0x00001300 | (dlli << 16) | (dlli << 24)); - ast_moutdwm(ast, 0x1E6E0074, CBR_SIZE2); - data = cbr_scan(ast); - if (data != 0) { - if (data & 0x1) { - if (dllmin[0] > dlli) { - dllmin[0] = dlli; - } - if (dllmax[0] < dlli) { - dllmax[0] = dlli; - } - } - if (data & 0x2) { - if (dllmin[1] > dlli) { - dllmin[1] = dlli; - } - if (dllmax[1] < dlli) { - dllmax[1] = dlli; - } - } - passcnt++; - } else if (passcnt >= CBR_THRESHOLD) { - break; - } - } - if (retry++ > 10) - goto CBR_DONE2; - if (dllmax[0] == 0 || (dllmax[0]-dllmin[0]) < CBR_THRESHOLD) { - goto CBR_START2; - } - if (dllmax[1] == 0 || (dllmax[1]-dllmin[1]) < CBR_THRESHOLD) { - goto CBR_START2; - } - status = true; -CBR_DONE2: - dlli = (dllmin[1] + dllmax[1]) >> 1; - dlli <<= 8; - dlli += (dllmin[0] + dllmax[0]) >> 1; - ast_moutdwm(ast, 0x1E6E0068, ast_mindwm(ast, 0x1E720058) | (dlli << 16)); - return status; -} /* CBRDLL2 */ - -static void get_ddr3_info(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 trap, trap_AC2, trap_MRS; - - ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); - - /* Ger trap info */ - trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3; - trap_AC2 = 0x00020000 + (trap << 16); - trap_AC2 |= 0x00300000 + ((trap & 0x2) << 19); - trap_MRS = 0x00000010 + (trap << 4); - trap_MRS |= ((trap & 0x2) << 18); - - param->reg_MADJ = 0x00034C4C; - param->reg_SADJ = 0x00001800; - param->reg_DRV = 0x000000F0; - param->reg_PERIOD = param->dram_freq; - param->rodt = 0; - - switch (param->dram_freq) { - case 336: - ast_moutdwm(ast, 0x1E6E2020, 0x0190); - param->wodt = 0; - param->reg_AC1 = 0x22202725; - param->reg_AC2 = 0xAA007613 | trap_AC2; - param->reg_DQSIC = 0x000000BA; - param->reg_MRS = 0x04001400 | trap_MRS; - param->reg_EMRS = 0x00000000; - param->reg_IOZ = 0x00000023; - param->reg_DQIDLY = 0x00000074; - param->reg_FREQ = 0x00004DC0; - param->madj_max = 96; - param->dll2_finetune_step = 3; - switch (param->dram_chipid) { - default: - case AST_DRAM_512Mx16: - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xAA007613 | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xAA00761C | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xAA007636 | trap_AC2; - break; - } - break; - default: - case 396: - ast_moutdwm(ast, 0x1E6E2020, 0x03F1); - param->wodt = 1; - param->reg_AC1 = 0x33302825; - param->reg_AC2 = 0xCC009617 | trap_AC2; - param->reg_DQSIC = 0x000000E2; - param->reg_MRS = 0x04001600 | trap_MRS; - param->reg_EMRS = 0x00000000; - param->reg_IOZ = 0x00000034; - param->reg_DRV = 0x000000FA; - param->reg_DQIDLY = 0x00000089; - param->reg_FREQ = 0x00005040; - param->madj_max = 96; - param->dll2_finetune_step = 4; - - switch (param->dram_chipid) { - default: - case AST_DRAM_512Mx16: - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xCC009617 | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xCC009622 | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xCC00963F | trap_AC2; - break; - } - break; - - case 408: - ast_moutdwm(ast, 0x1E6E2020, 0x01F0); - param->wodt = 1; - param->reg_AC1 = 0x33302825; - param->reg_AC2 = 0xCC009617 | trap_AC2; - param->reg_DQSIC = 0x000000E2; - param->reg_MRS = 0x04001600 | trap_MRS; - param->reg_EMRS = 0x00000000; - param->reg_IOZ = 0x00000023; - param->reg_DRV = 0x000000FA; - param->reg_DQIDLY = 0x00000089; - param->reg_FREQ = 0x000050C0; - param->madj_max = 96; - param->dll2_finetune_step = 4; - - switch (param->dram_chipid) { - default: - case AST_DRAM_512Mx16: - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xCC009617 | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xCC009622 | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xCC00963F | trap_AC2; - break; - } - - break; - case 456: - ast_moutdwm(ast, 0x1E6E2020, 0x0230); - param->wodt = 0; - param->reg_AC1 = 0x33302926; - param->reg_AC2 = 0xCD44961A; - param->reg_DQSIC = 0x000000FC; - param->reg_MRS = 0x00081830; - param->reg_EMRS = 0x00000000; - param->reg_IOZ = 0x00000045; - param->reg_DQIDLY = 0x00000097; - param->reg_FREQ = 0x000052C0; - param->madj_max = 88; - param->dll2_finetune_step = 4; - break; - case 504: - ast_moutdwm(ast, 0x1E6E2020, 0x0270); - param->wodt = 1; - param->reg_AC1 = 0x33302926; - param->reg_AC2 = 0xDE44A61D; - param->reg_DQSIC = 0x00000117; - param->reg_MRS = 0x00081A30; - param->reg_EMRS = 0x00000000; - param->reg_IOZ = 0x070000BB; - param->reg_DQIDLY = 0x000000A0; - param->reg_FREQ = 0x000054C0; - param->madj_max = 79; - param->dll2_finetune_step = 4; - break; - case 528: - ast_moutdwm(ast, 0x1E6E2020, 0x0290); - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x33302926; - param->reg_AC2 = 0xEF44B61E; - param->reg_DQSIC = 0x00000125; - param->reg_MRS = 0x00081A30; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x000000F5; - param->reg_IOZ = 0x00000023; - param->reg_DQIDLY = 0x00000088; - param->reg_FREQ = 0x000055C0; - param->madj_max = 76; - param->dll2_finetune_step = 3; - break; - case 576: - ast_moutdwm(ast, 0x1E6E2020, 0x0140); - param->reg_MADJ = 0x00136868; - param->reg_SADJ = 0x00004534; - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x33302A37; - param->reg_AC2 = 0xEF56B61E; - param->reg_DQSIC = 0x0000013F; - param->reg_MRS = 0x00101A50; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x000000FA; - param->reg_IOZ = 0x00000023; - param->reg_DQIDLY = 0x00000078; - param->reg_FREQ = 0x000057C0; - param->madj_max = 136; - param->dll2_finetune_step = 3; - break; - case 600: - ast_moutdwm(ast, 0x1E6E2020, 0x02E1); - param->reg_MADJ = 0x00136868; - param->reg_SADJ = 0x00004534; - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x32302A37; - param->reg_AC2 = 0xDF56B61F; - param->reg_DQSIC = 0x0000014D; - param->reg_MRS = 0x00101A50; - param->reg_EMRS = 0x00000004; - param->reg_DRV = 0x000000F5; - param->reg_IOZ = 0x00000023; - param->reg_DQIDLY = 0x00000078; - param->reg_FREQ = 0x000058C0; - param->madj_max = 132; - param->dll2_finetune_step = 3; - break; - case 624: - ast_moutdwm(ast, 0x1E6E2020, 0x0160); - param->reg_MADJ = 0x00136868; - param->reg_SADJ = 0x00004534; - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x32302A37; - param->reg_AC2 = 0xEF56B621; - param->reg_DQSIC = 0x0000015A; - param->reg_MRS = 0x02101A50; - param->reg_EMRS = 0x00000004; - param->reg_DRV = 0x000000F5; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x00000078; - param->reg_FREQ = 0x000059C0; - param->madj_max = 128; - param->dll2_finetune_step = 3; - break; - } /* switch freq */ - - switch (param->dram_chipid) { - case AST_DRAM_512Mx16: - param->dram_config = 0x130; - break; - default: - case AST_DRAM_1Gx16: - param->dram_config = 0x131; - break; - case AST_DRAM_2Gx16: - param->dram_config = 0x132; - break; - case AST_DRAM_4Gx16: - param->dram_config = 0x133; - break; - } /* switch size */ - - switch (param->vram_size) { - default: - case SZ_8M: - param->dram_config |= 0x00; - break; - case SZ_16M: - param->dram_config |= 0x04; - break; - case SZ_32M: - param->dram_config |= 0x08; - break; - case SZ_64M: - param->dram_config |= 0x0c; - break; - } - -} - -static void ddr3_init(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 data, data2, retry = 0; - -ddr3_init_start: - ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); - ast_moutdwm(ast, 0x1E6E0018, 0x00000100); - ast_moutdwm(ast, 0x1E6E0024, 0x00000000); - ast_moutdwm(ast, 0x1E6E0034, 0x00000000); - udelay(10); - ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ); - ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ); - udelay(10); - ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000); - udelay(10); - - ast_moutdwm(ast, 0x1E6E0004, param->dram_config); - ast_moutdwm(ast, 0x1E6E0008, 0x90040f); - ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1); - ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2); - ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC); - ast_moutdwm(ast, 0x1E6E0080, 0x00000000); - ast_moutdwm(ast, 0x1E6E0084, 0x00000000); - ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY); - ast_moutdwm(ast, 0x1E6E0018, 0x4000A170); - ast_moutdwm(ast, 0x1E6E0018, 0x00002370); - ast_moutdwm(ast, 0x1E6E0038, 0x00000000); - ast_moutdwm(ast, 0x1E6E0040, 0xFF444444); - ast_moutdwm(ast, 0x1E6E0044, 0x22222222); - ast_moutdwm(ast, 0x1E6E0048, 0x22222222); - ast_moutdwm(ast, 0x1E6E004C, 0x00000002); - ast_moutdwm(ast, 0x1E6E0050, 0x80000000); - ast_moutdwm(ast, 0x1E6E0050, 0x00000000); - ast_moutdwm(ast, 0x1E6E0054, 0); - ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV); - ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ); - ast_moutdwm(ast, 0x1E6E0070, 0x00000000); - ast_moutdwm(ast, 0x1E6E0074, 0x00000000); - ast_moutdwm(ast, 0x1E6E0078, 0x00000000); - ast_moutdwm(ast, 0x1E6E007C, 0x00000000); - /* Wait MCLK2X lock to MCLK */ - do { - data = ast_mindwm(ast, 0x1E6E001C); - } while (!(data & 0x08000000)); - data = ast_mindwm(ast, 0x1E6E001C); - data = (data >> 8) & 0xff; - while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) { - data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4; - if ((data2 & 0xff) > param->madj_max) { - break; - } - ast_moutdwm(ast, 0x1E6E0064, data2); - if (data2 & 0x00100000) { - data2 = ((data2 & 0xff) >> 3) + 3; - } else { - data2 = ((data2 & 0xff) >> 2) + 5; - } - data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff; - data2 += data & 0xff; - data = data | (data2 << 8); - ast_moutdwm(ast, 0x1E6E0068, data); - udelay(10); - ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000); - udelay(10); - data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff; - ast_moutdwm(ast, 0x1E6E0018, data); - data = data | 0x200; - ast_moutdwm(ast, 0x1E6E0018, data); - do { - data = ast_mindwm(ast, 0x1E6E001C); - } while (!(data & 0x08000000)); - - data = ast_mindwm(ast, 0x1E6E001C); - data = (data >> 8) & 0xff; - } - ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0068) & 0xffff); - data = ast_mindwm(ast, 0x1E6E0018) | 0xC00; - ast_moutdwm(ast, 0x1E6E0018, data); - - ast_moutdwm(ast, 0x1E6E0034, 0x00000001); - ast_moutdwm(ast, 0x1E6E000C, 0x00000040); - udelay(50); - /* Mode Register Setting */ - ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100); - ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); - ast_moutdwm(ast, 0x1E6E0028, 0x00000005); - ast_moutdwm(ast, 0x1E6E0028, 0x00000007); - ast_moutdwm(ast, 0x1E6E0028, 0x00000003); - ast_moutdwm(ast, 0x1E6E0028, 0x00000001); - ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS); - ast_moutdwm(ast, 0x1E6E000C, 0x00005C08); - ast_moutdwm(ast, 0x1E6E0028, 0x00000001); - - ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); - data = 0; - if (param->wodt) { - data = 0x300; - } - if (param->rodt) { - data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3); - } - ast_moutdwm(ast, 0x1E6E0034, data | 0x3); - - /* Calibrate the DQSI delay */ - if ((cbr_dll2(ast, param) == false) && (retry++ < 10)) - goto ddr3_init_start; - - ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ); - /* ECC Memory Initialization */ -#ifdef ECC - ast_moutdwm(ast, 0x1E6E007C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0070, 0x221); - do { - data = ast_mindwm(ast, 0x1E6E0070); - } while (!(data & 0x00001000)); - ast_moutdwm(ast, 0x1E6E0070, 0x00000000); - ast_moutdwm(ast, 0x1E6E0050, 0x80000000); - ast_moutdwm(ast, 0x1E6E0050, 0x00000000); -#endif - - -} - -static void get_ddr2_info(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 trap, trap_AC2, trap_MRS; - - ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); - - /* Ger trap info */ - trap = (ast_mindwm(ast, 0x1E6E2070) >> 25) & 0x3; - trap_AC2 = (trap << 20) | (trap << 16); - trap_AC2 += 0x00110000; - trap_MRS = 0x00000040 | (trap << 4); - - - param->reg_MADJ = 0x00034C4C; - param->reg_SADJ = 0x00001800; - param->reg_DRV = 0x000000F0; - param->reg_PERIOD = param->dram_freq; - param->rodt = 0; - - switch (param->dram_freq) { - case 264: - ast_moutdwm(ast, 0x1E6E2020, 0x0130); - param->wodt = 0; - param->reg_AC1 = 0x11101513; - param->reg_AC2 = 0x78117011; - param->reg_DQSIC = 0x00000092; - param->reg_MRS = 0x00000842; - param->reg_EMRS = 0x00000000; - param->reg_DRV = 0x000000F0; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x0000005A; - param->reg_FREQ = 0x00004AC0; - param->madj_max = 138; - param->dll2_finetune_step = 3; - break; - case 336: - ast_moutdwm(ast, 0x1E6E2020, 0x0190); - param->wodt = 1; - param->reg_AC1 = 0x22202613; - param->reg_AC2 = 0xAA009016 | trap_AC2; - param->reg_DQSIC = 0x000000BA; - param->reg_MRS = 0x00000A02 | trap_MRS; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x000000FA; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x00000074; - param->reg_FREQ = 0x00004DC0; - param->madj_max = 96; - param->dll2_finetune_step = 3; - switch (param->dram_chipid) { - default: - case AST_DRAM_512Mx16: - param->reg_AC2 = 0xAA009012 | trap_AC2; - break; - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xAA009016 | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xAA009023 | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xAA00903B | trap_AC2; - break; - } - break; - default: - case 396: - ast_moutdwm(ast, 0x1E6E2020, 0x03F1); - param->wodt = 1; - param->rodt = 0; - param->reg_AC1 = 0x33302714; - param->reg_AC2 = 0xCC00B01B | trap_AC2; - param->reg_DQSIC = 0x000000E2; - param->reg_MRS = 0x00000C02 | trap_MRS; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x000000FA; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x00000089; - param->reg_FREQ = 0x00005040; - param->madj_max = 96; - param->dll2_finetune_step = 4; - - switch (param->dram_chipid) { - case AST_DRAM_512Mx16: - param->reg_AC2 = 0xCC00B016 | trap_AC2; - break; - default: - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xCC00B01B | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xCC00B02B | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xCC00B03F | trap_AC2; - break; - } - - break; - - case 408: - ast_moutdwm(ast, 0x1E6E2020, 0x01F0); - param->wodt = 1; - param->rodt = 0; - param->reg_AC1 = 0x33302714; - param->reg_AC2 = 0xCC00B01B | trap_AC2; - param->reg_DQSIC = 0x000000E2; - param->reg_MRS = 0x00000C02 | trap_MRS; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x000000FA; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x00000089; - param->reg_FREQ = 0x000050C0; - param->madj_max = 96; - param->dll2_finetune_step = 4; - - switch (param->dram_chipid) { - case AST_DRAM_512Mx16: - param->reg_AC2 = 0xCC00B016 | trap_AC2; - break; - default: - case AST_DRAM_1Gx16: - param->reg_AC2 = 0xCC00B01B | trap_AC2; - break; - case AST_DRAM_2Gx16: - param->reg_AC2 = 0xCC00B02B | trap_AC2; - break; - case AST_DRAM_4Gx16: - param->reg_AC2 = 0xCC00B03F | trap_AC2; - break; - } - - break; - case 456: - ast_moutdwm(ast, 0x1E6E2020, 0x0230); - param->wodt = 0; - param->reg_AC1 = 0x33302815; - param->reg_AC2 = 0xCD44B01E; - param->reg_DQSIC = 0x000000FC; - param->reg_MRS = 0x00000E72; - param->reg_EMRS = 0x00000000; - param->reg_DRV = 0x00000000; - param->reg_IOZ = 0x00000034; - param->reg_DQIDLY = 0x00000097; - param->reg_FREQ = 0x000052C0; - param->madj_max = 88; - param->dll2_finetune_step = 3; - break; - case 504: - ast_moutdwm(ast, 0x1E6E2020, 0x0261); - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x33302815; - param->reg_AC2 = 0xDE44C022; - param->reg_DQSIC = 0x00000117; - param->reg_MRS = 0x00000E72; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x0000000A; - param->reg_IOZ = 0x00000045; - param->reg_DQIDLY = 0x000000A0; - param->reg_FREQ = 0x000054C0; - param->madj_max = 79; - param->dll2_finetune_step = 3; - break; - case 528: - ast_moutdwm(ast, 0x1E6E2020, 0x0120); - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x33302815; - param->reg_AC2 = 0xEF44D024; - param->reg_DQSIC = 0x00000125; - param->reg_MRS = 0x00000E72; - param->reg_EMRS = 0x00000004; - param->reg_DRV = 0x000000F9; - param->reg_IOZ = 0x00000045; - param->reg_DQIDLY = 0x000000A7; - param->reg_FREQ = 0x000055C0; - param->madj_max = 76; - param->dll2_finetune_step = 3; - break; - case 552: - ast_moutdwm(ast, 0x1E6E2020, 0x02A1); - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x43402915; - param->reg_AC2 = 0xFF44E025; - param->reg_DQSIC = 0x00000132; - param->reg_MRS = 0x00000E72; - param->reg_EMRS = 0x00000040; - param->reg_DRV = 0x0000000A; - param->reg_IOZ = 0x00000045; - param->reg_DQIDLY = 0x000000AD; - param->reg_FREQ = 0x000056C0; - param->madj_max = 76; - param->dll2_finetune_step = 3; - break; - case 576: - ast_moutdwm(ast, 0x1E6E2020, 0x0140); - param->wodt = 1; - param->rodt = 1; - param->reg_AC1 = 0x43402915; - param->reg_AC2 = 0xFF44E027; - param->reg_DQSIC = 0x0000013F; - param->reg_MRS = 0x00000E72; - param->reg_EMRS = 0x00000004; - param->reg_DRV = 0x000000F5; - param->reg_IOZ = 0x00000045; - param->reg_DQIDLY = 0x000000B3; - param->reg_FREQ = 0x000057C0; - param->madj_max = 76; - param->dll2_finetune_step = 3; - break; - } - - switch (param->dram_chipid) { - case AST_DRAM_512Mx16: - param->dram_config = 0x100; - break; - default: - case AST_DRAM_1Gx16: - param->dram_config = 0x121; - break; - case AST_DRAM_2Gx16: - param->dram_config = 0x122; - break; - case AST_DRAM_4Gx16: - param->dram_config = 0x123; - break; - } /* switch size */ - - switch (param->vram_size) { - default: - case SZ_8M: - param->dram_config |= 0x00; - break; - case SZ_16M: - param->dram_config |= 0x04; - break; - case SZ_32M: - param->dram_config |= 0x08; - break; - case SZ_64M: - param->dram_config |= 0x0c; - break; - } -} - -static void ddr2_init(struct ast_device *ast, struct ast2300_dram_param *param) -{ - u32 data, data2, retry = 0; - -ddr2_init_start: - ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); - ast_moutdwm(ast, 0x1E6E0018, 0x00000100); - ast_moutdwm(ast, 0x1E6E0024, 0x00000000); - ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ); - ast_moutdwm(ast, 0x1E6E0068, param->reg_SADJ); - udelay(10); - ast_moutdwm(ast, 0x1E6E0064, param->reg_MADJ | 0xC0000); - udelay(10); - - ast_moutdwm(ast, 0x1E6E0004, param->dram_config); - ast_moutdwm(ast, 0x1E6E0008, 0x90040f); - ast_moutdwm(ast, 0x1E6E0010, param->reg_AC1); - ast_moutdwm(ast, 0x1E6E0014, param->reg_AC2); - ast_moutdwm(ast, 0x1E6E0020, param->reg_DQSIC); - ast_moutdwm(ast, 0x1E6E0080, 0x00000000); - ast_moutdwm(ast, 0x1E6E0084, 0x00000000); - ast_moutdwm(ast, 0x1E6E0088, param->reg_DQIDLY); - ast_moutdwm(ast, 0x1E6E0018, 0x4000A130); - ast_moutdwm(ast, 0x1E6E0018, 0x00002330); - ast_moutdwm(ast, 0x1E6E0038, 0x00000000); - ast_moutdwm(ast, 0x1E6E0040, 0xFF808000); - ast_moutdwm(ast, 0x1E6E0044, 0x88848466); - ast_moutdwm(ast, 0x1E6E0048, 0x44440008); - ast_moutdwm(ast, 0x1E6E004C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0050, 0x80000000); - ast_moutdwm(ast, 0x1E6E0050, 0x00000000); - ast_moutdwm(ast, 0x1E6E0054, 0); - ast_moutdwm(ast, 0x1E6E0060, param->reg_DRV); - ast_moutdwm(ast, 0x1E6E006C, param->reg_IOZ); - ast_moutdwm(ast, 0x1E6E0070, 0x00000000); - ast_moutdwm(ast, 0x1E6E0074, 0x00000000); - ast_moutdwm(ast, 0x1E6E0078, 0x00000000); - ast_moutdwm(ast, 0x1E6E007C, 0x00000000); - - /* Wait MCLK2X lock to MCLK */ - do { - data = ast_mindwm(ast, 0x1E6E001C); - } while (!(data & 0x08000000)); - data = ast_mindwm(ast, 0x1E6E001C); - data = (data >> 8) & 0xff; - while ((data & 0x08) || ((data & 0x7) < 2) || (data < 4)) { - data2 = (ast_mindwm(ast, 0x1E6E0064) & 0xfff3ffff) + 4; - if ((data2 & 0xff) > param->madj_max) { - break; - } - ast_moutdwm(ast, 0x1E6E0064, data2); - if (data2 & 0x00100000) { - data2 = ((data2 & 0xff) >> 3) + 3; - } else { - data2 = ((data2 & 0xff) >> 2) + 5; - } - data = ast_mindwm(ast, 0x1E6E0068) & 0xffff00ff; - data2 += data & 0xff; - data = data | (data2 << 8); - ast_moutdwm(ast, 0x1E6E0068, data); - udelay(10); - ast_moutdwm(ast, 0x1E6E0064, ast_mindwm(ast, 0x1E6E0064) | 0xC0000); - udelay(10); - data = ast_mindwm(ast, 0x1E6E0018) & 0xfffff1ff; - ast_moutdwm(ast, 0x1E6E0018, data); - data = data | 0x200; - ast_moutdwm(ast, 0x1E6E0018, data); - do { - data = ast_mindwm(ast, 0x1E6E001C); - } while (!(data & 0x08000000)); - - data = ast_mindwm(ast, 0x1E6E001C); - data = (data >> 8) & 0xff; - } - ast_moutdwm(ast, 0x1E720058, ast_mindwm(ast, 0x1E6E0008) & 0xffff); - data = ast_mindwm(ast, 0x1E6E0018) | 0xC00; - ast_moutdwm(ast, 0x1E6E0018, data); - - ast_moutdwm(ast, 0x1E6E0034, 0x00000001); - ast_moutdwm(ast, 0x1E6E000C, 0x00000000); - udelay(50); - /* Mode Register Setting */ - ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS | 0x100); - ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); - ast_moutdwm(ast, 0x1E6E0028, 0x00000005); - ast_moutdwm(ast, 0x1E6E0028, 0x00000007); - ast_moutdwm(ast, 0x1E6E0028, 0x00000003); - ast_moutdwm(ast, 0x1E6E0028, 0x00000001); - - ast_moutdwm(ast, 0x1E6E000C, 0x00005C08); - ast_moutdwm(ast, 0x1E6E002C, param->reg_MRS); - ast_moutdwm(ast, 0x1E6E0028, 0x00000001); - ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS | 0x380); - ast_moutdwm(ast, 0x1E6E0028, 0x00000003); - ast_moutdwm(ast, 0x1E6E0030, param->reg_EMRS); - ast_moutdwm(ast, 0x1E6E0028, 0x00000003); - - ast_moutdwm(ast, 0x1E6E000C, 0x7FFF5C01); - data = 0; - if (param->wodt) { - data = 0x500; - } - if (param->rodt) { - data = data | 0x3000 | ((param->reg_AC2 & 0x60000) >> 3); - } - ast_moutdwm(ast, 0x1E6E0034, data | 0x3); - ast_moutdwm(ast, 0x1E6E0120, param->reg_FREQ); - - /* Calibrate the DQSI delay */ - if ((cbr_dll2(ast, param) == false) && (retry++ < 10)) - goto ddr2_init_start; - - /* ECC Memory Initialization */ -#ifdef ECC - ast_moutdwm(ast, 0x1E6E007C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0070, 0x221); - do { - data = ast_mindwm(ast, 0x1E6E0070); - } while (!(data & 0x00001000)); - ast_moutdwm(ast, 0x1E6E0070, 0x00000000); - ast_moutdwm(ast, 0x1E6E0050, 0x80000000); - ast_moutdwm(ast, 0x1E6E0050, 0x00000000); -#endif - -} - -static void ast_post_chip_2300(struct ast_device *ast) -{ - struct ast2300_dram_param param; - u32 temp; - u8 reg; - - reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - if ((reg & 0x80) == 0) {/* vga only */ - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - ast_write32(ast, 0x12000, 0x1688a8a8); - do { - ; - } while (ast_read32(ast, 0x12000) != 0x1); - - ast_write32(ast, 0x10000, 0xfc600309); - do { - ; - } while (ast_read32(ast, 0x10000) != 0x1); - - /* Slow down CPU/AHB CLK in VGA only mode */ - temp = ast_read32(ast, 0x12008); - temp |= 0x73; - ast_write32(ast, 0x12008, temp); - - param.dram_freq = 396; - param.dram_type = AST_DDR3; - temp = ast_mindwm(ast, 0x1e6e2070); - if (temp & 0x01000000) - param.dram_type = AST_DDR2; - switch (temp & 0x18000000) { - case 0: - param.dram_chipid = AST_DRAM_512Mx16; - break; - default: - case 0x08000000: - param.dram_chipid = AST_DRAM_1Gx16; - break; - case 0x10000000: - param.dram_chipid = AST_DRAM_2Gx16; - break; - case 0x18000000: - param.dram_chipid = AST_DRAM_4Gx16; - break; - } - switch (temp & 0x0c) { - default: - case 0x00: - param.vram_size = SZ_8M; - break; - - case 0x04: - param.vram_size = SZ_16M; - break; - - case 0x08: - param.vram_size = SZ_32M; - break; - - case 0x0c: - param.vram_size = SZ_64M; - break; - } - - if (param.dram_type == AST_DDR3) { - get_ddr3_info(ast, ¶m); - ddr3_init(ast, ¶m); - } else { - get_ddr2_info(ast, ¶m); - ddr2_init(ast, ¶m); - } - - temp = ast_mindwm(ast, 0x1e6e2040); - ast_moutdwm(ast, 0x1e6e2040, temp | 0x40); - } - - /* wait ready */ - do { - reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - } while ((reg & 0x40) == 0); -} - -static bool cbr_test_2500(struct ast_device *ast) -{ - ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF); - ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00); - if (!mmc_test_burst(ast, 0)) - return false; - if (!mmc_test_single_2500(ast, 0)) - return false; - return true; -} - -static bool ddr_test_2500(struct ast_device *ast) -{ - ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF); - ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00); - if (!mmc_test_burst(ast, 0)) - return false; - if (!mmc_test_burst(ast, 1)) - return false; - if (!mmc_test_burst(ast, 2)) - return false; - if (!mmc_test_burst(ast, 3)) - return false; - if (!mmc_test_single_2500(ast, 0)) - return false; - return true; -} - -static void ddr_init_common_2500(struct ast_device *ast) -{ - ast_moutdwm(ast, 0x1E6E0034, 0x00020080); - ast_moutdwm(ast, 0x1E6E0008, 0x2003000F); - ast_moutdwm(ast, 0x1E6E0038, 0x00000FFF); - ast_moutdwm(ast, 0x1E6E0040, 0x88448844); - ast_moutdwm(ast, 0x1E6E0044, 0x24422288); - ast_moutdwm(ast, 0x1E6E0048, 0x22222222); - ast_moutdwm(ast, 0x1E6E004C, 0x22222222); - ast_moutdwm(ast, 0x1E6E0050, 0x80000000); - ast_moutdwm(ast, 0x1E6E0208, 0x00000000); - ast_moutdwm(ast, 0x1E6E0218, 0x00000000); - ast_moutdwm(ast, 0x1E6E0220, 0x00000000); - ast_moutdwm(ast, 0x1E6E0228, 0x00000000); - ast_moutdwm(ast, 0x1E6E0230, 0x00000000); - ast_moutdwm(ast, 0x1E6E02A8, 0x00000000); - ast_moutdwm(ast, 0x1E6E02B0, 0x00000000); - ast_moutdwm(ast, 0x1E6E0240, 0x86000000); - ast_moutdwm(ast, 0x1E6E0244, 0x00008600); - ast_moutdwm(ast, 0x1E6E0248, 0x80000000); - ast_moutdwm(ast, 0x1E6E024C, 0x80808080); -} - -static void ddr_phy_init_2500(struct ast_device *ast) -{ - u32 data, pass, timecnt; - - pass = 0; - ast_moutdwm(ast, 0x1E6E0060, 0x00000005); - while (!pass) { - for (timecnt = 0; timecnt < TIMEOUT; timecnt++) { - data = ast_mindwm(ast, 0x1E6E0060) & 0x1; - if (!data) - break; - } - if (timecnt != TIMEOUT) { - data = ast_mindwm(ast, 0x1E6E0300) & 0x000A0000; - if (!data) - pass = 1; - } - if (!pass) { - ast_moutdwm(ast, 0x1E6E0060, 0x00000000); - udelay(10); /* delay 10 us */ - ast_moutdwm(ast, 0x1E6E0060, 0x00000005); - } - } - - ast_moutdwm(ast, 0x1E6E0060, 0x00000006); -} - -/* - * Check DRAM Size - * 1Gb : 0x80000000 ~ 0x87FFFFFF - * 2Gb : 0x80000000 ~ 0x8FFFFFFF - * 4Gb : 0x80000000 ~ 0x9FFFFFFF - * 8Gb : 0x80000000 ~ 0xBFFFFFFF - */ -static void check_dram_size_2500(struct ast_device *ast, u32 tRFC) -{ - u32 reg_04, reg_14; - - reg_04 = ast_mindwm(ast, 0x1E6E0004) & 0xfffffffc; - reg_14 = ast_mindwm(ast, 0x1E6E0014) & 0xffffff00; - - ast_moutdwm(ast, 0xA0100000, 0x41424344); - ast_moutdwm(ast, 0x90100000, 0x35363738); - ast_moutdwm(ast, 0x88100000, 0x292A2B2C); - ast_moutdwm(ast, 0x80100000, 0x1D1E1F10); - - /* Check 8Gbit */ - if (ast_mindwm(ast, 0xA0100000) == 0x41424344) { - reg_04 |= 0x03; - reg_14 |= (tRFC >> 24) & 0xFF; - /* Check 4Gbit */ - } else if (ast_mindwm(ast, 0x90100000) == 0x35363738) { - reg_04 |= 0x02; - reg_14 |= (tRFC >> 16) & 0xFF; - /* Check 2Gbit */ - } else if (ast_mindwm(ast, 0x88100000) == 0x292A2B2C) { - reg_04 |= 0x01; - reg_14 |= (tRFC >> 8) & 0xFF; - } else { - reg_14 |= tRFC & 0xFF; - } - ast_moutdwm(ast, 0x1E6E0004, reg_04); - ast_moutdwm(ast, 0x1E6E0014, reg_14); -} - -static void enable_cache_2500(struct ast_device *ast) -{ - u32 reg_04, data; - - reg_04 = ast_mindwm(ast, 0x1E6E0004); - ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x1000); - - do - data = ast_mindwm(ast, 0x1E6E0004); - while (!(data & 0x80000)); - ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x400); -} - -static void set_mpll_2500(struct ast_device *ast) -{ - u32 addr, data, param; - - /* Reset MMC */ - ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); - ast_moutdwm(ast, 0x1E6E0034, 0x00020080); - for (addr = 0x1e6e0004; addr < 0x1e6e0090;) { - ast_moutdwm(ast, addr, 0x0); - addr += 4; - } - ast_moutdwm(ast, 0x1E6E0034, 0x00020000); - - ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8); - data = ast_mindwm(ast, 0x1E6E2070) & 0x00800000; - if (data) { - /* CLKIN = 25MHz */ - param = 0x930023E0; - ast_moutdwm(ast, 0x1E6E2160, 0x00011320); - } else { - /* CLKIN = 24MHz */ - param = 0x93002400; - } - ast_moutdwm(ast, 0x1E6E2020, param); - udelay(100); -} - -static void reset_mmc_2500(struct ast_device *ast) -{ - ast_moutdwm(ast, 0x1E78505C, 0x00000004); - ast_moutdwm(ast, 0x1E785044, 0x00000001); - ast_moutdwm(ast, 0x1E785048, 0x00004755); - ast_moutdwm(ast, 0x1E78504C, 0x00000013); - mdelay(100); - ast_moutdwm(ast, 0x1E785054, 0x00000077); - ast_moutdwm(ast, 0x1E6E0000, 0xFC600309); -} - -static void ddr3_init_2500(struct ast_device *ast, const u32 *ddr_table) -{ - - ast_moutdwm(ast, 0x1E6E0004, 0x00000303); - ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]); - ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]); - ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]); - ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]); /* MODEREG4/6 */ - ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]); /* MODEREG5 */ - ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */ - ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]); /* MODEREG1/3 */ - - /* DDR PHY Setting */ - ast_moutdwm(ast, 0x1E6E0200, 0x02492AAE); - ast_moutdwm(ast, 0x1E6E0204, 0x00001001); - ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B); - ast_moutdwm(ast, 0x1E6E0210, 0x20000000); - ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]); - ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]); - ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]); - ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]); - ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]); - ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]); - ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]); - ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]); - ast_moutdwm(ast, 0x1E6E0290, 0x00100008); - ast_moutdwm(ast, 0x1E6E02C0, 0x00000006); - - /* Controller Setting */ - ast_moutdwm(ast, 0x1E6E0034, 0x00020091); - - /* Wait DDR PHY init done */ - ddr_phy_init_2500(ast); - - ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]); - ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81); - ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93); - - check_dram_size_2500(ast, ddr_table[REGIDX_RFC]); - enable_cache_2500(ast); - ast_moutdwm(ast, 0x1E6E001C, 0x00000008); - ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00); -} - -static void ddr4_init_2500(struct ast_device *ast, const u32 *ddr_table) -{ - u32 data, data2, pass, retrycnt; - u32 ddr_vref, phy_vref; - u32 min_ddr_vref = 0, min_phy_vref = 0; - u32 max_ddr_vref = 0, max_phy_vref = 0; - - ast_moutdwm(ast, 0x1E6E0004, 0x00000313); - ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]); - ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]); - ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]); - ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]); /* MODEREG4/6 */ - ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]); /* MODEREG5 */ - ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */ - ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]); /* MODEREG1/3 */ - - /* DDR PHY Setting */ - ast_moutdwm(ast, 0x1E6E0200, 0x42492AAE); - ast_moutdwm(ast, 0x1E6E0204, 0x09002000); - ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B); - ast_moutdwm(ast, 0x1E6E0210, 0x20000000); - ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]); - ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]); - ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]); - ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]); - ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]); - ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]); - ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]); - ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]); - ast_moutdwm(ast, 0x1E6E0290, 0x00100008); - ast_moutdwm(ast, 0x1E6E02C4, 0x3C183C3C); - ast_moutdwm(ast, 0x1E6E02C8, 0x00631E0E); - - /* Controller Setting */ - ast_moutdwm(ast, 0x1E6E0034, 0x0001A991); - - /* Train PHY Vref first */ - pass = 0; - - for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) { - max_phy_vref = 0x0; - pass = 0; - ast_moutdwm(ast, 0x1E6E02C0, 0x00001C06); - for (phy_vref = 0x40; phy_vref < 0x80; phy_vref++) { - ast_moutdwm(ast, 0x1E6E000C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0060, 0x00000000); - ast_moutdwm(ast, 0x1E6E02CC, phy_vref | (phy_vref << 8)); - /* Fire DFI Init */ - ddr_phy_init_2500(ast); - ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); - if (cbr_test_2500(ast)) { - pass++; - data = ast_mindwm(ast, 0x1E6E03D0); - data2 = data >> 8; - data = data & 0xff; - if (data > data2) - data = data2; - if (max_phy_vref < data) { - max_phy_vref = data; - min_phy_vref = phy_vref; - } - } else if (pass > 0) - break; - } - } - ast_moutdwm(ast, 0x1E6E02CC, min_phy_vref | (min_phy_vref << 8)); - - /* Train DDR Vref next */ - pass = 0; - - for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) { - min_ddr_vref = 0xFF; - max_ddr_vref = 0x0; - pass = 0; - for (ddr_vref = 0x00; ddr_vref < 0x40; ddr_vref++) { - ast_moutdwm(ast, 0x1E6E000C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0060, 0x00000000); - ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8)); - /* Fire DFI Init */ - ddr_phy_init_2500(ast); - ast_moutdwm(ast, 0x1E6E000C, 0x00005C01); - if (cbr_test_2500(ast)) { - pass++; - if (min_ddr_vref > ddr_vref) - min_ddr_vref = ddr_vref; - if (max_ddr_vref < ddr_vref) - max_ddr_vref = ddr_vref; - } else if (pass != 0) - break; - } - } - - ast_moutdwm(ast, 0x1E6E000C, 0x00000000); - ast_moutdwm(ast, 0x1E6E0060, 0x00000000); - ddr_vref = (min_ddr_vref + max_ddr_vref + 1) >> 1; - ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8)); - - /* Wait DDR PHY init done */ - ddr_phy_init_2500(ast); - - ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]); - ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81); - ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93); - - check_dram_size_2500(ast, ddr_table[REGIDX_RFC]); - enable_cache_2500(ast); - ast_moutdwm(ast, 0x1E6E001C, 0x00000008); - ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00); -} - -static bool ast_dram_init_2500(struct ast_device *ast) -{ - u32 data; - u32 max_tries = 5; - - do { - if (max_tries-- == 0) - return false; - set_mpll_2500(ast); - reset_mmc_2500(ast); - ddr_init_common_2500(ast); - - data = ast_mindwm(ast, 0x1E6E2070); - if (data & 0x01000000) - ddr4_init_2500(ast, ast2500_ddr4_1600_timing_table); - else - ddr3_init_2500(ast, ast2500_ddr3_1600_timing_table); - } while (!ddr_test_2500(ast)); - - ast_moutdwm(ast, 0x1E6E2040, ast_mindwm(ast, 0x1E6E2040) | 0x41); - - /* Patch code */ - data = ast_mindwm(ast, 0x1E6E200C) & 0xF9FFFFFF; - ast_moutdwm(ast, 0x1E6E200C, data | 0x10000000); - - return true; -} - -void ast_patch_ahb_2500(void __iomem *regs) -{ - u32 data; - - /* Clear bus lock condition */ - __ast_moutdwm(regs, 0x1e600000, 0xAEED1A03); - __ast_moutdwm(regs, 0x1e600084, 0x00010000); - __ast_moutdwm(regs, 0x1e600088, 0x00000000); - __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); - - data = __ast_mindwm(regs, 0x1e6e2070); - if (data & 0x08000000) { /* check fast reset */ - /* - * If "Fast restet" is enabled for ARM-ICE debugger, - * then WDT needs to enable, that - * WDT04 is WDT#1 Reload reg. - * WDT08 is WDT#1 counter restart reg to avoid system deadlock - * WDT0C is WDT#1 control reg - * [6:5]:= 01:Full chip - * [4]:= 1:1MHz clock source - * [1]:= 1:WDT will be cleeared and disabled after timeout occurs - * [0]:= 1:WDT enable - */ - __ast_moutdwm(regs, 0x1E785004, 0x00000010); - __ast_moutdwm(regs, 0x1E785008, 0x00004755); - __ast_moutdwm(regs, 0x1E78500c, 0x00000033); - udelay(1000); - } - - do { - __ast_moutdwm(regs, 0x1e6e2000, 0x1688A8A8); - data = __ast_mindwm(regs, 0x1e6e2000); - } while (data != 1); - - __ast_moutdwm(regs, 0x1e6e207c, 0x08000000); /* clear fast reset */ -} - -void ast_post_chip_2500(struct ast_device *ast) -{ - struct drm_device *dev = &ast->base; - u32 temp; - u8 reg; - - reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - if ((reg & AST_IO_VGACRD0_VRAM_INIT_STATUS_MASK) == 0) {/* vga only */ - /* Clear bus lock condition */ - ast_patch_ahb_2500(ast->regs); - - /* Disable watchdog */ - ast_moutdwm(ast, 0x1E78502C, 0x00000000); - ast_moutdwm(ast, 0x1E78504C, 0x00000000); - - /* - * Reset USB port to patch USB unknown device issue - * SCU90 is Multi-function Pin Control #5 - * [29]:= 1:Enable USB2.0 Host port#1 (that the mutually shared USB2.0 Hub - * port). - * SCU94 is Multi-function Pin Control #6 - * [14:13]:= 1x:USB2.0 Host2 controller - * SCU70 is Hardware Strap reg - * [23]:= 1:CLKIN is 25MHz and USBCK1 = 24/48 MHz (determined by - * [18]: 0(24)/1(48) MHz) - * SCU7C is Write clear reg to SCU70 - * [23]:= write 1 and then SCU70[23] will be clear as 0b. - */ - ast_moutdwm(ast, 0x1E6E2090, 0x20000000); - ast_moutdwm(ast, 0x1E6E2094, 0x00004000); - if (ast_mindwm(ast, 0x1E6E2070) & 0x00800000) { - ast_moutdwm(ast, 0x1E6E207C, 0x00800000); - mdelay(100); - ast_moutdwm(ast, 0x1E6E2070, 0x00800000); - } - /* Modify eSPI reset pin */ - temp = ast_mindwm(ast, 0x1E6E2070); - if (temp & 0x02000000) - ast_moutdwm(ast, 0x1E6E207C, 0x00004000); - - /* Slow down CPU/AHB CLK in VGA only mode */ - temp = ast_read32(ast, 0x12008); - temp |= 0x73; - ast_write32(ast, 0x12008, temp); - - if (!ast_dram_init_2500(ast)) - drm_err(dev, "DRAM init failed !\n"); - - temp = ast_mindwm(ast, 0x1e6e2040); - ast_moutdwm(ast, 0x1e6e2040, temp | 0x40); - } - - /* wait ready */ - do { - reg = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xd0, 0xff); - } while ((reg & 0x40) == 0); -} diff --git a/drivers/gpu/drm/ast/ast_post.h b/drivers/gpu/drm/ast/ast_post.h new file mode 100644 index 000000000000..aa5d247bebe8 --- /dev/null +++ b/drivers/gpu/drm/ast/ast_post.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef AST_POST_H +#define AST_POST_H + +#include <linux/limits.h> +#include <linux/types.h> + +struct ast_device; + +/* DRAM timing tables */ +struct ast_dramstruct { + u16 index; + u32 data; +}; + +/* hardware fields */ +#define __AST_DRAMSTRUCT_DRAM_TYPE 0x0004 + +/* control commands */ +#define __AST_DRAMSTRUCT_UDELAY 0xff00 +#define __AST_DRAMSTRUCT_INVALID 0xffff + +#define __AST_DRAMSTRUCT_INDEX(_name) \ + (__AST_DRAMSTRUCT_ ## _name) + +#define AST_DRAMSTRUCT_INIT(_name, _value) \ + { __AST_DRAMSTRUCT_INDEX(_name), (_value) } + +#define AST_DRAMSTRUCT_UDELAY(_usecs) \ + AST_DRAMSTRUCT_INIT(UDELAY, _usecs) +#define AST_DRAMSTRUCT_INVALID \ + AST_DRAMSTRUCT_INIT(INVALID, U32_MAX) + +#define AST_DRAMSTRUCT_IS(_entry, _name) \ + ((_entry)->index == __AST_DRAMSTRUCT_INDEX(_name)) + +u32 __ast_mindwm(void __iomem *regs, u32 r); +void __ast_moutdwm(void __iomem *regs, u32 r, u32 v); + +bool mmc_test(struct ast_device *ast, u32 datagen, u8 test_ctl); +bool mmc_test_burst(struct ast_device *ast, u32 datagen); + +/* ast_2000.c */ +void ast_2000_set_def_ext_reg(struct ast_device *ast); + +/* ast_2300.c */ +void ast_2300_set_def_ext_reg(struct ast_device *ast); + +#endif diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index e15adaf3a80e..30578e3b07e4 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -29,6 +29,7 @@ #define AST_IO_VGAGRI (0x4E) #define AST_IO_VGACRI (0x54) +#define AST_IO_VGACR17_SYNC_ENABLE BIT(7) /* called "Hardware reset" in docs */ #define AST_IO_VGACR80_PASSWORD (0xa8) #define AST_IO_VGACR99_VGAMEM_RSRV_MASK GENMASK(1, 0) #define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index b9e0ca85226a..a250afd8d662 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -120,8 +120,8 @@ config DRM_ITE_IT6505 select DRM_DISPLAY_DP_AUX_BUS select DRM_KMS_HELPER select EXTCON - select CRYPTO - select CRYPTO_HASH + select CRYPTO_LIB_SHA1 + select REGMAP_I2C help ITE IT6505 DisplayPort bridge chip driver. @@ -316,6 +316,19 @@ config DRM_SIMPLE_BRIDGE Support for non-programmable DRM bridges, such as ADI ADV7123, TI THS8134 and THS8135 or passive resistor ladder DACs. +config DRM_SOLOMON_SSD2825 + tristate "SSD2825 RGB/DSI bridge" + depends on SPI_MASTER && OF + select DRM_MIPI_DSI + select DRM_KMS_HELPER + select DRM_PANEL + help + Say Y here if you want support for the Solomon SSD2825 RGB/DSI + SPI bridge driver. + + Say M here if you want to support this hardware as a module. + The module will be named "ssd2825". + config DRM_THINE_THC63LVD1024 tristate "Thine THC63LVD1024 LVDS decoder bridge" depends on OF @@ -438,6 +451,18 @@ config DRM_TI_TPD12S015 Texas Instruments TPD12S015 HDMI level shifter and ESD protection driver. +config DRM_WAVESHARE_BRIDGE + tristate "Waveshare DSI bridge" + depends on OF + depends on BACKLIGHT_CLASS_DEVICE + select DRM_PANEL_BRIDGE + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select REGMAP_I2C + help + Driver for waveshare DSI to DPI bridge board. + Please say Y if you have such hardware + source "drivers/gpu/drm/bridge/analogix/Kconfig" source "drivers/gpu/drm/bridge/adv7511/Kconfig" diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 245e8a27e3fc..c7dc03182e59 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_DRM_SIL_SII8620) += sil-sii8620.o obj-$(CONFIG_DRM_SII902X) += sii902x.o obj-$(CONFIG_DRM_SII9234) += sii9234.o obj-$(CONFIG_DRM_SIMPLE_BRIDGE) += simple-bridge.o +obj-$(CONFIG_DRM_SOLOMON_SSD2825) += ssd2825.o obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o obj-$(CONFIG_DRM_TOSHIBA_TC358762) += tc358762.o obj-$(CONFIG_DRM_TOSHIBA_TC358764) += tc358764.o @@ -40,6 +41,7 @@ obj-$(CONFIG_DRM_TI_SN65DSI86) += ti-sn65dsi86.o obj-$(CONFIG_DRM_TI_TDP158) += ti-tdp158.o obj-$(CONFIG_DRM_TI_TFP410) += ti-tfp410.o obj-$(CONFIG_DRM_TI_TPD12S015) += ti-tpd12s015.o +obj-$(CONFIG_DRM_WAVESHARE_BRIDGE) += waveshare-dsi.o obj-$(CONFIG_DRM_NWL_MIPI_DSI) += nwl-dsi.o obj-$(CONFIG_DRM_ITE_IT66121) += ite-it66121.o diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 71bb64e5f481..8be7266fd4f4 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -195,13 +195,14 @@ #define ADV7511_I2S_IEC958_DIRECT 3 #define ADV7511_PACKET(p, x) ((p) * 0x20 + (x)) -#define ADV7511_PACKET_SDP(x) ADV7511_PACKET(0, x) +#define ADV7511_PACKET_SPD(x) ADV7511_PACKET(0, x) #define ADV7511_PACKET_MPEG(x) ADV7511_PACKET(1, x) #define ADV7511_PACKET_ACP(x) ADV7511_PACKET(2, x) #define ADV7511_PACKET_ISRC1(x) ADV7511_PACKET(3, x) #define ADV7511_PACKET_ISRC2(x) ADV7511_PACKET(4, x) #define ADV7511_PACKET_GM(x) ADV7511_PACKET(5, x) -#define ADV7511_PACKET_SPARE(x) ADV7511_PACKET(6, x) +#define ADV7511_PACKET_SPARE1(x) ADV7511_PACKET(6, x) +#define ADV7511_PACKET_SPARE2(x) ADV7511_PACKET(7, x) #define ADV7511_REG_CEC_TX_FRAME_HDR 0x00 #define ADV7511_REG_CEC_TX_FRAME_DATA0 0x01 @@ -348,6 +349,7 @@ struct adv7511 { struct i2c_client *i2c_cec; struct regmap *regmap; + struct regmap *regmap_packet; struct regmap *regmap_cec; enum drm_connector_status status; bool powered; @@ -399,8 +401,8 @@ static inline struct adv7511 *bridge_to_adv7511(struct drm_bridge *bridge) } #ifdef CONFIG_DRM_I2C_ADV7511_CEC -int adv7511_cec_init(struct drm_connector *connector, - struct drm_bridge *bridge); +int adv7511_cec_init(struct drm_bridge *bridge, + struct drm_connector *connector); int adv7511_cec_enable(struct drm_bridge *bridge, bool enable); int adv7511_cec_log_addr(struct drm_bridge *bridge, u8 addr); int adv7511_cec_transmit(struct drm_bridge *bridge, u8 attempts, @@ -424,12 +426,12 @@ int adv7533_attach_dsi(struct adv7511 *adv); int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv); #ifdef CONFIG_DRM_I2C_ADV7511_AUDIO -int adv7511_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge); -void adv7511_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); -int adv7511_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector); +void adv7511_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); +int adv7511_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); #else /*CONFIG_DRM_I2C_ADV7511_AUDIO */ diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 915c3b967216..87e7e820810a 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -12,6 +12,8 @@ #include <sound/soc.h> #include <linux/of_graph.h> +#include <drm/display/drm_hdmi_state_helper.h> + #include "adv7511.h" static void adv7511_calc_cts_n(unsigned int f_tmds, unsigned int fs, @@ -55,8 +57,8 @@ static int adv7511_update_cts_n(struct adv7511 *adv7511) return 0; } -int adv7511_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int adv7511_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -155,21 +157,12 @@ int adv7511_hdmi_audio_prepare(struct drm_connector *connector, regmap_update_bits(adv7511->regmap, ADV7511_REG_I2C_FREQ_ID_CFG, ADV7511_I2C_FREQ_ID_CFG_RATE_MASK, rate << 4); - /* send current Audio infoframe values while updating */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), BIT(5)); - - regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(0), 0x1); - - /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), 0); - - return 0; + return drm_atomic_helper_connector_hdmi_update_audio_infoframe(connector, + &hparms->cea); } -int adv7511_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge) +int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); @@ -188,15 +181,9 @@ int adv7511_hdmi_audio_startup(struct drm_connector *connector, /* not copyrighted */ regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CFG1, BIT(5), BIT(5)); - /* enable audio infoframes */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_PACKET_ENABLE1, - BIT(3), BIT(3)); /* AV mute disable */ regmap_update_bits(adv7511->regmap, ADV7511_REG_GC(0), BIT(7) | BIT(6), BIT(7)); - /* use Audio infoframe updated info */ - regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, - BIT(5), 0); /* enable SPDIF receiver */ if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) @@ -206,12 +193,14 @@ int adv7511_hdmi_audio_startup(struct drm_connector *connector, return 0; } -void adv7511_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void adv7511_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); if (adv7511->audio_source == ADV7511_AUDIO_SOURCE_SPDIF) regmap_update_bits(adv7511->regmap, ADV7511_REG_AUDIO_CONFIG, BIT(7), 0); + + drm_atomic_helper_connector_hdmi_clear_audio_infoframe(connector); } diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index 822265426f58..8ecbc25dc647 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -346,8 +346,8 @@ static int adv7511_cec_parse_dt(struct device *dev, struct adv7511 *adv7511) return 0; } -int adv7511_cec_init(struct drm_connector *connector, - struct drm_bridge *bridge) +int adv7511_cec_init(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); struct device *dev = &adv7511->i2c_main->dev; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 9df18a8f2e37..b9be86541307 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -132,6 +132,13 @@ static const struct regmap_config adv7511_regmap_config = { .volatile_reg = adv7511_register_volatile, }; +static const struct regmap_config adv7511_packet_config = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = 0xff, +}; + /* ----------------------------------------------------------------------------- * Hardware configuration */ @@ -864,7 +871,8 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge, return ret; } -static enum drm_connector_status adv7511_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +adv7511_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct adv7511 *adv = bridge_to_adv7511(bridge); @@ -885,9 +893,18 @@ static int adv7511_bridge_hdmi_clear_infoframe(struct drm_bridge *bridge, struct adv7511 *adv7511 = bridge_to_adv7511(bridge); switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME); + break; case HDMI_INFOFRAME_TYPE_AVI: adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_AVI_INFOFRAME); break; + case HDMI_INFOFRAME_TYPE_SPD: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPD); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + break; default: drm_dbg_driver(adv7511->bridge.dev, "Unsupported HDMI InfoFrame %x\n", type); break; @@ -902,16 +919,52 @@ static int adv7511_bridge_hdmi_write_infoframe(struct drm_bridge *bridge, { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); - adv7511_bridge_hdmi_clear_infoframe(bridge, type); - switch (type) { + case HDMI_INFOFRAME_TYPE_AUDIO: + /* send current Audio infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), BIT(5)); + + /* The Audio infoframe id is not configurable */ + regmap_bulk_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME_VERSION, + buffer + 1, len - 1); + + /* use Audio infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(5), 0); + + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_AUDIO_INFOFRAME); + break; case HDMI_INFOFRAME_TYPE_AVI: + /* send current AVI infoframe values while updating */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(6), BIT(6)); + /* The AVI infoframe id is not configurable */ regmap_bulk_write(adv7511->regmap, ADV7511_REG_AVI_INFOFRAME_VERSION, buffer + 1, len - 1); + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME_LENGTH, 0x2); + regmap_write(adv7511->regmap, ADV7511_REG_AUDIO_INFOFRAME(1), 0x1); + + /* use AVI infoframe updated info */ + regmap_update_bits(adv7511->regmap, ADV7511_REG_INFOFRAME_UPDATE, + BIT(6), 0); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_AVI_INFOFRAME); break; + case HDMI_INFOFRAME_TYPE_SPD: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPD); + regmap_bulk_write(adv7511->regmap_packet, ADV7511_PACKET_SPD(0), + buffer, len); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_SPD); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + adv7511_packet_disable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + regmap_bulk_write(adv7511->regmap_packet, ADV7511_PACKET_SPARE1(0), + buffer, len); + adv7511_packet_enable(adv7511, ADV7511_PACKET_ENABLE_SPARE1); + break; default: drm_dbg_driver(adv7511->bridge.dev, "Unsupported HDMI InfoFrame %x\n", type); break; @@ -1241,6 +1294,13 @@ static int adv7511_probe(struct i2c_client *i2c) goto err_i2c_unregister_edid; } + adv7511->regmap_packet = devm_regmap_init_i2c(adv7511->i2c_packet, + &adv7511_packet_config); + if (IS_ERR(adv7511->regmap_packet)) { + ret = PTR_ERR(adv7511->regmap_packet); + goto err_i2c_unregister_packet; + } + regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, adv7511->i2c_packet->addr << 1); @@ -1262,9 +1322,7 @@ static int adv7511_probe(struct i2c_client *i2c) adv7511->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | - DRM_BRIDGE_OP_HDMI | - DRM_BRIDGE_OP_HDMI_AUDIO | - DRM_BRIDGE_OP_HDMI_CEC_ADAPTER; + DRM_BRIDGE_OP_HDMI; if (adv7511->i2c_main->irq) adv7511->bridge.ops |= DRM_BRIDGE_OP_HPD; @@ -1272,6 +1330,7 @@ static int adv7511_probe(struct i2c_client *i2c) adv7511->bridge.product = adv7511->info->name; #ifdef CONFIG_DRM_I2C_ADV7511_AUDIO + adv7511->bridge.ops |= DRM_BRIDGE_OP_HDMI_AUDIO; adv7511->bridge.hdmi_audio_dev = dev; adv7511->bridge.hdmi_audio_max_i2s_playback_channels = 2; adv7511->bridge.hdmi_audio_i2s_formats = (SNDRV_PCM_FMTBIT_S16_LE | @@ -1284,6 +1343,7 @@ static int adv7511_probe(struct i2c_client *i2c) #endif #ifdef CONFIG_DRM_I2C_ADV7511_CEC + adv7511->bridge.ops |= DRM_BRIDGE_OP_HDMI_CEC_ADAPTER; adv7511->bridge.hdmi_cec_dev = dev; adv7511->bridge.hdmi_cec_adapter_name = dev_name(dev); adv7511->bridge.hdmi_cec_available_las = ADV7511_MAX_ADDRS; diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index a1bc3e96dd35..efe534977d12 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1041,7 +1041,7 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, struct drm_encoder *encoder, enum drm_bridge_attach_flags flags) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_connector *connector = NULL; int ret = 0; @@ -1125,7 +1125,7 @@ struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, static void analogix_dp_bridge_atomic_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *old_state) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state; @@ -1180,7 +1180,7 @@ out_dp_init: static void analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, struct drm_atomic_state *old_state) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state; int timeout_loop = 0; @@ -1217,7 +1217,7 @@ static void analogix_dp_bridge_atomic_enable(struct drm_bridge *bridge, static void analogix_dp_bridge_disable(struct drm_bridge *bridge) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); if (dp->dpms_mode != DRM_MODE_DPMS_ON) return; @@ -1240,7 +1240,7 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) static void analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, struct drm_atomic_state *old_state) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_crtc *old_crtc, *new_crtc; struct drm_crtc_state *old_crtc_state = NULL; struct drm_crtc_state *new_crtc_state = NULL; @@ -1278,7 +1278,7 @@ out: static void analogix_dp_bridge_atomic_post_disable(struct drm_bridge *bridge, struct drm_atomic_state *old_state) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; int ret; @@ -1300,7 +1300,7 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge, const struct drm_display_mode *orig_mode, const struct drm_display_mode *mode) { - struct analogix_dp_device *dp = bridge->driver_private; + struct analogix_dp_device *dp = to_dp(bridge); struct drm_display_info *display_info = &dp->connector.display_info; struct video_info *video = &dp->video_info; struct device_node *dp_node = dp->dev->of_node; @@ -1385,25 +1385,6 @@ static const struct drm_bridge_funcs analogix_dp_bridge_funcs = { .attach = analogix_dp_bridge_attach, }; -static int analogix_dp_create_bridge(struct drm_device *drm_dev, - struct analogix_dp_device *dp) -{ - struct drm_bridge *bridge; - - bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL); - if (!bridge) { - DRM_ERROR("failed to allocate for drm bridge\n"); - return -ENOMEM; - } - - dp->bridge = bridge; - - bridge->driver_private = dp; - bridge->funcs = &analogix_dp_bridge_funcs; - - return drm_bridge_attach(dp->encoder, bridge, NULL, 0); -} - static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) { struct device_node *dp_node = dp->dev->of_node; @@ -1491,9 +1472,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) return ERR_PTR(-EINVAL); } - dp = devm_kzalloc(dev, sizeof(struct analogix_dp_device), GFP_KERNEL); - if (!dp) - return ERR_PTR(-ENOMEM); + dp = devm_drm_bridge_alloc(dev, struct analogix_dp_device, bridge, + &analogix_dp_bridge_funcs); + if (IS_ERR(dp)) + return ERR_CAST(dp); dp->dev = &pdev->dev; dp->dpms_mode = DRM_MODE_DPMS_OFF; @@ -1643,7 +1625,7 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) return ret; } - ret = analogix_dp_create_bridge(drm_dev, dp); + ret = drm_bridge_attach(dp->encoder, &dp->bridge, NULL, 0); if (ret) { DRM_ERROR("failed to create bridge (%d)\n", ret); goto err_unregister_aux; @@ -1660,7 +1642,7 @@ EXPORT_SYMBOL_GPL(analogix_dp_bind); void analogix_dp_unbind(struct analogix_dp_device *dp) { - analogix_dp_bridge_disable(dp->bridge); + analogix_dp_bridge_disable(&dp->bridge); dp->connector.funcs->destroy(&dp->connector); drm_panel_unprepare(dp->plat_data->panel); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h index 2b54120ba4a3..b86e93f30ed6 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h @@ -11,6 +11,7 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_crtc.h> +#include <drm/drm_bridge.h> #define DP_TIMEOUT_LOOP_COUNT 100 #define MAX_CR_LOOP 5 @@ -154,7 +155,7 @@ struct analogix_dp_device { struct device *dev; struct drm_device *drm_dev; struct drm_connector connector; - struct drm_bridge *bridge; + struct drm_bridge bridge; struct drm_dp_aux aux; struct clk *clock; unsigned int irq; diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 0ac4a82c5a6e..6f3fdcb6afdb 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2448,7 +2448,7 @@ anx7625_audio_update_connector_status(struct anx7625_data *ctx, enum drm_connector_status status); static enum drm_connector_status -anx7625_bridge_detect(struct drm_bridge *bridge) +anx7625_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct anx7625_data *ctx = bridge_to_anx7625(bridge); struct device *dev = ctx->dev; @@ -2604,6 +2604,7 @@ static int anx7625_link_bridge(struct drm_dp_aux *aux) platform->bridge.type = platform->pdata.panel_bridge ? DRM_MODE_CONNECTOR_eDP : DRM_MODE_CONNECTOR_DisplayPort; + platform->bridge.support_hdcp = true; drm_bridge_add(&platform->bridge); @@ -2677,7 +2678,7 @@ static int anx7625_i2c_probe(struct i2c_client *client) ret = devm_request_threaded_irq(dev, platform->pdata.intp_irq, NULL, anx7625_intr_hpd_isr, IRQF_TRIGGER_FALLING | - IRQF_ONESHOT, + IRQF_ONESHOT | IRQF_NO_AUTOEN, "anx7625-intp", platform); if (ret) { DRM_DEV_ERROR(dev, "fail to request irq\n"); @@ -2746,8 +2747,10 @@ static int anx7625_i2c_probe(struct i2c_client *client) } /* Add work function */ - if (platform->pdata.intp_irq) + if (platform->pdata.intp_irq) { + enable_irq(platform->pdata.intp_irq); queue_work(platform->workqueue, &platform->work); + } if (platform->pdata.audio_en) anx7625_register_audio(dev, platform); diff --git a/drivers/gpu/drm/bridge/aux-bridge.c b/drivers/gpu/drm/bridge/aux-bridge.c index b63304d3a80f..b3e4cdff61d6 100644 --- a/drivers/gpu/drm/bridge/aux-bridge.c +++ b/drivers/gpu/drm/bridge/aux-bridge.c @@ -18,6 +18,7 @@ static void drm_aux_bridge_release(struct device *dev) { struct auxiliary_device *adev = to_auxiliary_dev(dev); + of_node_put(dev->of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); @@ -65,6 +66,7 @@ int drm_aux_bridge_register(struct device *parent) ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.of_node); ida_free(&drm_aux_bridge_ida, adev->id); kfree(adev); return ret; diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index e579f947e15b..2e9c702c7087 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -65,10 +65,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index cced81633ddc..f1d8a8a151d8 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -6,6 +6,7 @@ config DRM_CDNS_DSI select DRM_PANEL_BRIDGE select GENERIC_PHY select GENERIC_PHY_MIPI_DPHY + select VIDEOMODE_HELPERS depends on OF help Support Cadence DPI to DSI bridge. This is an internal diff --git a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c index a57ca8c3bdae..09b289f0fcbf 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-dsi-core.c @@ -9,6 +9,7 @@ #include <drm/drm_drv.h> #include <drm/drm_probe_helper.h> #include <video/mipi_display.h> +#include <video/videomode.h> #include <linux/clk.h> #include <linux/interrupt.h> @@ -417,7 +418,8 @@ #define DSI_OUTPUT_PORT 0 #define DSI_INPUT_PORT(inputid) (1 + (inputid)) -#define DSI_HBP_FRAME_OVERHEAD 12 +#define DSI_HBP_FRAME_PULSE_OVERHEAD 12 +#define DSI_HBP_FRAME_EVENT_OVERHEAD 16 #define DSI_HSA_FRAME_OVERHEAD 14 #define DSI_HFP_FRAME_OVERHEAD 6 #define DSI_HSS_VSS_VSE_FRAME_OVERHEAD 4 @@ -452,15 +454,6 @@ bridge_to_cdns_dsi_input(struct drm_bridge *bridge) return container_of(bridge, struct cdns_dsi_input, bridge); } -static unsigned int mode_to_dpi_hfp(const struct drm_display_mode *mode, - bool mode_valid_check) -{ - if (mode_valid_check) - return mode->hsync_start - mode->hdisplay; - - return mode->crtc_hsync_start - mode->crtc_hdisplay; -} - static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing, unsigned int dpi_bpp, unsigned int dsi_pkt_overhead) @@ -476,145 +469,77 @@ static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing, } static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi, - const struct drm_display_mode *mode, - struct cdns_dsi_cfg *dsi_cfg, - bool mode_valid_check) + const struct videomode *vm, + struct cdns_dsi_cfg *dsi_cfg) { struct cdns_dsi_output *output = &dsi->output; - unsigned int tmp; - bool sync_pulse = false; + u32 dpi_hsa, dpi_hbp, dpi_hfp, dpi_hact; + bool sync_pulse; int bpp; + dpi_hsa = vm->hsync_len; + dpi_hbp = vm->hback_porch; + dpi_hfp = vm->hfront_porch; + dpi_hact = vm->hactive; + memset(dsi_cfg, 0, sizeof(*dsi_cfg)); - if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) - sync_pulse = true; + sync_pulse = output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE; bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); - if (mode_valid_check) - tmp = mode->htotal - - (sync_pulse ? mode->hsync_end : mode->hsync_start); - else - tmp = mode->crtc_htotal - - (sync_pulse ? - mode->crtc_hsync_end : mode->crtc_hsync_start); - - dsi_cfg->hbp = dpi_to_dsi_timing(tmp, bpp, DSI_HBP_FRAME_OVERHEAD); - if (sync_pulse) { - if (mode_valid_check) - tmp = mode->hsync_end - mode->hsync_start; - else - tmp = mode->crtc_hsync_end - mode->crtc_hsync_start; + dsi_cfg->hbp = dpi_to_dsi_timing(dpi_hbp, bpp, + DSI_HBP_FRAME_PULSE_OVERHEAD); - dsi_cfg->hsa = dpi_to_dsi_timing(tmp, bpp, + dsi_cfg->hsa = dpi_to_dsi_timing(dpi_hsa, bpp, DSI_HSA_FRAME_OVERHEAD); - } - - dsi_cfg->hact = dpi_to_dsi_timing(mode_valid_check ? - mode->hdisplay : mode->crtc_hdisplay, - bpp, 0); - dsi_cfg->hfp = dpi_to_dsi_timing(mode_to_dpi_hfp(mode, mode_valid_check), - bpp, DSI_HFP_FRAME_OVERHEAD); - - return 0; -} - -static int cdns_dsi_adjust_phy_config(struct cdns_dsi *dsi, - struct cdns_dsi_cfg *dsi_cfg, - struct phy_configure_opts_mipi_dphy *phy_cfg, - const struct drm_display_mode *mode, - bool mode_valid_check) -{ - struct cdns_dsi_output *output = &dsi->output; - unsigned long long dlane_bps; - unsigned long adj_dsi_htotal; - unsigned long dsi_htotal; - unsigned long dpi_htotal; - unsigned long dpi_hz; - unsigned int dsi_hfp_ext; - unsigned int lanes = output->dev->lanes; - - dsi_htotal = dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; - if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) - dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + } else { + dsi_cfg->hbp = dpi_to_dsi_timing(dpi_hbp + dpi_hsa, bpp, + DSI_HBP_FRAME_EVENT_OVERHEAD); - dsi_htotal += dsi_cfg->hact; - dsi_htotal += dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD; - - /* - * Make sure DSI htotal is aligned on a lane boundary when calculating - * the expected data rate. This is done by extending HFP in case of - * misalignment. - */ - adj_dsi_htotal = dsi_htotal; - if (dsi_htotal % lanes) - adj_dsi_htotal += lanes - (dsi_htotal % lanes); + dsi_cfg->hsa = 0; + } - dpi_hz = (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000; - dlane_bps = (unsigned long long)dpi_hz * adj_dsi_htotal; + dsi_cfg->hact = dpi_to_dsi_timing(dpi_hact, bpp, 0); - /* data rate in bytes/sec is not an integer, refuse the mode. */ - dpi_htotal = mode_valid_check ? mode->htotal : mode->crtc_htotal; - if (do_div(dlane_bps, lanes * dpi_htotal)) - return -EINVAL; + dsi_cfg->hfp = dpi_to_dsi_timing(dpi_hfp, bpp, DSI_HFP_FRAME_OVERHEAD); - /* data rate was in bytes/sec, convert to bits/sec. */ - phy_cfg->hs_clk_rate = dlane_bps * 8; + dsi_cfg->htotal = dsi_cfg->hact + dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD; - dsi_hfp_ext = adj_dsi_htotal - dsi_htotal; - dsi_cfg->hfp += dsi_hfp_ext; - dsi_cfg->htotal = dsi_htotal + dsi_hfp_ext; + if (sync_pulse) { + dsi_cfg->htotal += dsi_cfg->hbp + DSI_HBP_FRAME_PULSE_OVERHEAD; + dsi_cfg->htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + } else { + dsi_cfg->htotal += dsi_cfg->hbp + DSI_HBP_FRAME_EVENT_OVERHEAD; + } return 0; } static int cdns_dsi_check_conf(struct cdns_dsi *dsi, - const struct drm_display_mode *mode, - struct cdns_dsi_cfg *dsi_cfg, - bool mode_valid_check) + const struct videomode *vm, + struct cdns_dsi_cfg *dsi_cfg) { struct cdns_dsi_output *output = &dsi->output; struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy; - unsigned long dsi_hss_hsa_hse_hbp; unsigned int nlanes = output->dev->lanes; - int mode_clock = (mode_valid_check ? mode->clock : mode->crtc_clock); int ret; - ret = cdns_dsi_mode2cfg(dsi, mode, dsi_cfg, mode_valid_check); + ret = cdns_dsi_mode2cfg(dsi, vm, dsi_cfg); if (ret) return ret; - ret = phy_mipi_dphy_get_default_config(mode_clock * 1000, + ret = phy_mipi_dphy_get_default_config(vm->pixelclock, mipi_dsi_pixel_format_to_bpp(output->dev->format), nlanes, phy_cfg); if (ret) return ret; - ret = cdns_dsi_adjust_phy_config(dsi, dsi_cfg, phy_cfg, mode, mode_valid_check); - if (ret) - return ret; - ret = phy_validate(dsi->dphy, PHY_MODE_MIPI_DPHY, 0, &output->phy_opts); if (ret) return ret; - dsi_hss_hsa_hse_hbp = dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; - if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) - dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; - - /* - * Make sure DPI(HFP) > DSI(HSS+HSA+HSE+HBP) to guarantee that the FIFO - * is empty before we start a receiving a new line on the DPI - * interface. - */ - if ((u64)phy_cfg->hs_clk_rate * - mode_to_dpi_hfp(mode, mode_valid_check) * nlanes < - (u64)dsi_hss_hsa_hse_hbp * - (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000) - return -EINVAL; - return 0; } @@ -644,8 +569,7 @@ cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge, struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); struct cdns_dsi_output *output = &dsi->output; - struct cdns_dsi_cfg dsi_cfg; - int bpp, ret; + int bpp; /* * VFP_DSI should be less than VFP_DPI and VFP_DSI should be at @@ -663,10 +587,6 @@ cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge, if ((mode->hdisplay * bpp) % 32) return MODE_H_ILLEGAL; - ret = cdns_dsi_check_conf(dsi, mode, &dsi_cfg, true); - if (ret) - return MODE_BAD; - return MODE_OK; } @@ -882,7 +802,13 @@ static void cdns_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge, tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8, phy_cfg->hs_clk_rate); - reg_wakeup = (phy_cfg->hs_prepare + phy_cfg->hs_zero) / tx_byte_period; + + /* + * Estimated time [in clock cycles] to perform LP->HS on D-PHY. + * It is not clear how to calculate this, so for now, + * set it to 1/10 of the total number of clocks in a line. + */ + reg_wakeup = dsi_cfg.htotal / nlanes / 10; writel(REG_WAKEUP_TIME(reg_wakeup) | REG_LINE_DURATION(tmp), dsi->regs + VID_DPHY_TIME); @@ -989,6 +915,28 @@ static u32 *cdns_dsi_bridge_get_input_bus_fmts(struct drm_bridge *bridge, return input_fmts; } +static long cdns_dsi_round_pclk(struct cdns_dsi *dsi, unsigned long pclk) +{ + struct cdns_dsi_output *output = &dsi->output; + unsigned int nlanes = output->dev->lanes; + union phy_configure_opts phy_opts = { 0 }; + u32 bitspp; + int ret; + + bitspp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + + ret = phy_mipi_dphy_get_default_config(pclk, bitspp, nlanes, + &phy_opts.mipi_dphy); + if (ret) + return ret; + + ret = phy_validate(dsi->dphy, PHY_MODE_MIPI_DPHY, 0, &phy_opts); + if (ret) + return ret; + + return div_u64((u64)phy_opts.mipi_dphy.hs_clk_rate * nlanes, bitspp); +} + static int cdns_dsi_bridge_atomic_check(struct drm_bridge *bridge, struct drm_bridge_state *bridge_state, struct drm_crtc_state *crtc_state, @@ -997,10 +945,32 @@ static int cdns_dsi_bridge_atomic_check(struct drm_bridge *bridge, struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); struct cdns_dsi *dsi = input_to_dsi(input); struct cdns_dsi_bridge_state *dsi_state = to_cdns_dsi_bridge_state(bridge_state); - const struct drm_display_mode *mode = &crtc_state->mode; + struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; struct cdns_dsi_cfg *dsi_cfg = &dsi_state->dsi_cfg; + struct videomode vm; + long pclk; + + /* cdns-dsi requires negative syncs */ + adjusted_mode->flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC); + adjusted_mode->flags |= DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC; - return cdns_dsi_check_conf(dsi, mode, dsi_cfg, false); + /* + * The DPHY PLL has quite a coarsely grained clock rate options. See + * what hsclk rate we can achieve based on the pixel clock, convert it + * back to pixel clock, set that to the adjusted_mode->clock. This is + * all in hopes that the CRTC will be able to provide us the requested + * clock, as otherwise the DPI and DSI clocks will be out of sync. + */ + + pclk = cdns_dsi_round_pclk(dsi, adjusted_mode->clock * 1000); + if (pclk < 0) + return (int)pclk; + + adjusted_mode->clock = pclk / 1000; + + drm_display_mode_to_videomode(adjusted_mode, &vm); + + return cdns_dsi_check_conf(dsi, &vm, dsi_cfg); } static struct drm_bridge_state * @@ -1082,10 +1052,6 @@ static int cdns_dsi_attach(struct mipi_dsi_host *host, if (output->dev) return -EBUSY; - /* We do not support burst mode yet. */ - if (dev->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) - return -ENOTSUPP; - /* * The host <-> device link might be described using an OF-graph * representation, in this case we extract the device of_node from @@ -1442,4 +1408,3 @@ MODULE_AUTHOR("Boris Brezillon <boris.brezillon@bootlin.com>"); MODULE_DESCRIPTION("Cadence DSI driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:cdns-dsi"); - diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index cb5f5a8c539a..38726ae1bf15 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -1984,8 +1984,10 @@ static void cdns_mhdp_atomic_enable(struct drm_bridge *bridge, mhdp_state = to_cdns_mhdp_bridge_state(new_state); mhdp_state->current_mode = drm_mode_duplicate(bridge->dev, mode); - if (!mhdp_state->current_mode) - return; + if (!mhdp_state->current_mode) { + ret = -EINVAL; + goto out; + } drm_mode_set_name(mhdp_state->current_mode); @@ -2143,7 +2145,8 @@ static int cdns_mhdp_atomic_check(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status cdns_mhdp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +cdns_mhdp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct cdns_mhdp_device *mhdp = bridge_to_mhdp(bridge); diff --git a/drivers/gpu/drm/bridge/chrontel-ch7033.c b/drivers/gpu/drm/bridge/chrontel-ch7033.c index ab9274793356..54d49d4882c8 100644 --- a/drivers/gpu/drm/bridge/chrontel-ch7033.c +++ b/drivers/gpu/drm/bridge/chrontel-ch7033.c @@ -215,7 +215,7 @@ static enum drm_connector_status ch7033_connector_detect( { struct ch7033_priv *priv = conn_to_ch7033_priv(connector); - return drm_bridge_detect(priv->next_bridge); + return drm_bridge_detect(priv->next_bridge, connector); } static const struct drm_connector_funcs ch7033_connector_funcs = { diff --git a/drivers/gpu/drm/bridge/display-connector.c b/drivers/gpu/drm/bridge/display-connector.c index badd2c7f91a1..e9f16dbc9535 100644 --- a/drivers/gpu/drm/bridge/display-connector.c +++ b/drivers/gpu/drm/bridge/display-connector.c @@ -40,8 +40,7 @@ static int display_connector_attach(struct drm_bridge *bridge, return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; } -static enum drm_connector_status -display_connector_detect(struct drm_bridge *bridge) +static enum drm_connector_status display_connector_detect(struct drm_bridge *bridge) { struct display_connector *conn = to_display_connector(bridge); @@ -82,6 +81,12 @@ display_connector_detect(struct drm_bridge *bridge) } } +static enum drm_connector_status +display_connector_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) +{ + return display_connector_detect(bridge); +} + static const struct drm_edid *display_connector_edid_read(struct drm_bridge *bridge, struct drm_connector *connector) { @@ -103,7 +108,7 @@ static u32 *display_connector_get_output_bus_fmts(struct drm_bridge *bridge, struct drm_connector_state *conn_state, unsigned int *num_output_fmts) { - struct drm_bridge *prev_bridge = drm_bridge_get_prev_bridge(bridge); + struct drm_bridge *prev_bridge __free(drm_bridge_put) = drm_bridge_get_prev_bridge(bridge); struct drm_bridge_state *prev_bridge_state; if (!prev_bridge || !prev_bridge->funcs->atomic_get_output_bus_fmts) { @@ -146,7 +151,7 @@ static u32 *display_connector_get_input_bus_fmts(struct drm_bridge *bridge, u32 output_fmt, unsigned int *num_input_fmts) { - struct drm_bridge *prev_bridge = drm_bridge_get_prev_bridge(bridge); + struct drm_bridge *prev_bridge __free(drm_bridge_put) = drm_bridge_get_prev_bridge(bridge); struct drm_bridge_state *prev_bridge_state; if (!prev_bridge || !prev_bridge->funcs->atomic_get_input_bus_fmts) { @@ -172,7 +177,7 @@ static u32 *display_connector_get_input_bus_fmts(struct drm_bridge *bridge, static const struct drm_bridge_funcs display_connector_bridge_funcs = { .attach = display_connector_attach, - .detect = display_connector_detect, + .detect = display_connector_bridge_detect, .edid_read = display_connector_edid_read, .atomic_get_output_bus_fmts = display_connector_get_output_bus_fmts, .atomic_get_input_bus_fmts = display_connector_get_input_bus_fmts, @@ -368,7 +373,8 @@ static int display_connector_probe(struct platform_device *pdev) if (conn->bridge.ddc) conn->bridge.ops |= DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; - if (conn->hpd_gpio) + /* Detecting the monitor requires reading DPCD */ + if (conn->hpd_gpio && type != DRM_MODE_CONNECTOR_DisplayPort) conn->bridge.ops |= DRM_BRIDGE_OP_DETECT; if (conn->hpd_irq >= 0) conn->bridge.ops |= DRM_BRIDGE_OP_HPD; diff --git a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c index bea8346515b8..8f7a0d46601a 100644 --- a/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/imx/imx93-mipi-dsi.c @@ -492,14 +492,12 @@ static int imx93_dsi_get_phy_configure_opts(struct imx93_dsi *dsi, static enum drm_mode_status imx93_dsi_validate_mode(struct imx93_dsi *dsi, const struct drm_display_mode *mode) { - struct drm_bridge *bridge = dw_mipi_dsi_get_bridge(dsi->dmd); + struct drm_bridge *dmd_bridge = dw_mipi_dsi_get_bridge(dsi->dmd); + struct drm_bridge *last_bridge __free(drm_bridge_put) = + drm_bridge_chain_get_last_bridge(dmd_bridge->encoder); - /* Get the last bridge */ - while (drm_bridge_get_next_bridge(bridge)) - bridge = drm_bridge_get_next_bridge(bridge); - - if ((bridge->ops & DRM_BRIDGE_OP_DETECT) && - (bridge->ops & DRM_BRIDGE_OP_EDID)) { + if ((last_bridge->ops & DRM_BRIDGE_OP_DETECT) && + (last_bridge->ops & DRM_BRIDGE_OP_EDID)) { unsigned long pixel_clock_rate = mode->clock * 1000; unsigned long rounded_rate; diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index c4eedf643f39..2eb8fba7016c 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -146,6 +146,7 @@ #define HDMI_COLOR_DEPTH_24 FIELD_PREP(HDMI_COLOR_DEPTH, 4) #define HDMI_REG_PKT_GENERAL_CTRL 0xc6 +#define HDMI_REG_PKT_NULL_CTRL 0xc9 #define HDMI_REG_AVI_INFOFRM_CTRL 0xcd #define ENABLE_PKT BIT(0) #define REPEAT_PKT BIT(1) @@ -154,6 +155,12 @@ * 3) HDMI register bank1: 0x130 ~ 0x1ff (HDMI packet registers) */ +/* NULL packet registers */ +/* Header Byte(HB): n = 0 ~ 2 */ +#define HDMI_REG_PKT_HB(n) (0x138 + (n)) +/* Packet Byte(PB): n = 0 ~ 27(HDMI_MAX_INFOFRAME_SIZE), n = 0 for checksum */ +#define HDMI_REG_PKT_PB(n) (0x13b + (n)) + /* AVI packet registers */ #define HDMI_REG_AVI_DB1 0x158 #define HDMI_REG_AVI_DB2 0x159 @@ -224,7 +231,9 @@ static bool it6263_hdmi_writeable_reg(struct device *dev, unsigned int reg) case HDMI_REG_HDMI_MODE: case HDMI_REG_GCP: case HDMI_REG_PKT_GENERAL_CTRL: + case HDMI_REG_PKT_NULL_CTRL: case HDMI_REG_AVI_INFOFRM_CTRL: + case HDMI_REG_PKT_HB(0) ... HDMI_REG_PKT_PB(HDMI_MAX_INFOFRAME_SIZE): case HDMI_REG_AVI_DB1: case HDMI_REG_AVI_DB2: case HDMI_REG_AVI_DB3: @@ -693,7 +702,8 @@ static int it6263_bridge_attach(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status it6263_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +it6263_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it6263 *it = bridge_to_it6263(bridge); @@ -754,10 +764,16 @@ static int it6263_hdmi_clear_infoframe(struct drm_bridge *bridge, { struct it6263 *it = bridge_to_it6263(bridge); - if (type == HDMI_INFOFRAME_TYPE_AVI) + switch (type) { + case HDMI_INFOFRAME_TYPE_AVI: regmap_write(it->hdmi_regmap, HDMI_REG_AVI_INFOFRM_CTRL, 0); - else + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + regmap_write(it->hdmi_regmap, HDMI_REG_PKT_NULL_CTRL, 0); + break; + default: dev_dbg(it->dev, "unsupported HDMI infoframe 0x%x\n", type); + } return 0; } @@ -769,27 +785,36 @@ static int it6263_hdmi_write_infoframe(struct drm_bridge *bridge, struct it6263 *it = bridge_to_it6263(bridge); struct regmap *regmap = it->hdmi_regmap; - if (type != HDMI_INFOFRAME_TYPE_AVI) { + switch (type) { + case HDMI_INFOFRAME_TYPE_AVI: + /* write the first AVI infoframe data byte chunk(DB1-DB5) */ + regmap_bulk_write(regmap, HDMI_REG_AVI_DB1, + &buffer[HDMI_INFOFRAME_HEADER_SIZE], + HDMI_AVI_DB_CHUNK1_SIZE); + + /* write the second AVI infoframe data byte chunk(DB6-DB13) */ + regmap_bulk_write(regmap, HDMI_REG_AVI_DB6, + &buffer[HDMI_INFOFRAME_HEADER_SIZE + + HDMI_AVI_DB_CHUNK1_SIZE], + HDMI_AVI_DB_CHUNK2_SIZE); + + /* write checksum */ + regmap_write(regmap, HDMI_REG_AVI_CSUM, buffer[3]); + + regmap_write(regmap, HDMI_REG_AVI_INFOFRM_CTRL, + ENABLE_PKT | REPEAT_PKT); + break; + case HDMI_INFOFRAME_TYPE_VENDOR: + /* write header and payload */ + regmap_bulk_write(regmap, HDMI_REG_PKT_HB(0), buffer, len); + + regmap_write(regmap, HDMI_REG_PKT_NULL_CTRL, + ENABLE_PKT | REPEAT_PKT); + break; + default: dev_dbg(it->dev, "unsupported HDMI infoframe 0x%x\n", type); - return 0; } - /* write the first AVI infoframe data byte chunk(DB1-DB5) */ - regmap_bulk_write(regmap, HDMI_REG_AVI_DB1, - &buffer[HDMI_INFOFRAME_HEADER_SIZE], - HDMI_AVI_DB_CHUNK1_SIZE); - - /* write the second AVI infoframe data byte chunk(DB6-DB13) */ - regmap_bulk_write(regmap, HDMI_REG_AVI_DB6, - &buffer[HDMI_INFOFRAME_HEADER_SIZE + - HDMI_AVI_DB_CHUNK1_SIZE], - HDMI_AVI_DB_CHUNK2_SIZE); - - /* write checksum */ - regmap_write(regmap, HDMI_REG_AVI_CSUM, buffer[3]); - - regmap_write(regmap, HDMI_REG_AVI_INFOFRM_CTRL, ENABLE_PKT | REPEAT_PKT); - return 0; } diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index b0dc9280d870..a094803ba7aa 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -21,7 +21,7 @@ #include <linux/wait.h> #include <linux/bitfield.h> -#include <crypto/hash.h> +#include <crypto/sha1.h> #include <drm/display/drm_dp_helper.h> #include <drm/display/drm_hdcp_helper.h> @@ -2107,35 +2107,6 @@ static void it6505_hdcp_part1_auth(struct it6505 *it6505) it6505->hdcp_status = HDCP_AUTH_GOING; } -static int it6505_sha1_digest(struct it6505 *it6505, u8 *sha1_input, - unsigned int size, u8 *output_av) -{ - struct shash_desc *desc; - struct crypto_shash *tfm; - int err; - struct device *dev = it6505->dev; - - tfm = crypto_alloc_shash("sha1", 0, 0); - if (IS_ERR(tfm)) { - dev_err(dev, "crypto_alloc_shash sha1 failed"); - return PTR_ERR(tfm); - } - desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tfm); - return -ENOMEM; - } - - desc->tfm = tfm; - err = crypto_shash_digest(desc, sha1_input, size, output_av); - if (err) - dev_err(dev, "crypto_shash_digest sha1 failed"); - - crypto_free_shash(tfm); - kfree(desc); - return err; -} - static int it6505_setup_sha1_input(struct it6505 *it6505, u8 *sha1_input) { struct device *dev = it6505->dev; @@ -2205,7 +2176,7 @@ static bool it6505_hdcp_part2_ksvlist_check(struct it6505 *it6505) return false; } - it6505_sha1_digest(it6505, it6505->sha1_input, i, (u8 *)av); + sha1(it6505->sha1_input, i, (u8 *)av); /*1B-05 V' must retry 3 times */ for (retry = 0; retry < 3; retry++) { err = it6505_get_dpcd(it6505, DP_AUX_HDCP_V_PRIME(0), (u8 *)bv, @@ -3238,7 +3209,7 @@ static void it6505_bridge_atomic_post_disable(struct drm_bridge *bridge, } static enum drm_connector_status -it6505_bridge_detect(struct drm_bridge *bridge) +it6505_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it6505 *it6505 = bridge_to_it6505(bridge); diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 6494f0842793..aa7b1dcc5d70 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -843,7 +843,8 @@ static enum drm_mode_status it66121_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static enum drm_connector_status it66121_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +it66121_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it66121_ctx *ctx = container_of(bridge, struct it66121_ctx, bridge); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index bd83228b0f0e..342374cb8fc6 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -408,7 +408,7 @@ lt8912_connector_detect(struct drm_connector *connector, bool force) struct lt8912 *lt = connector_to_lt8912(connector); if (lt->hdmi_port->ops & DRM_BRIDGE_OP_DETECT) - return drm_bridge_detect(lt->hdmi_port); + return drm_bridge_detect(lt->hdmi_port, connector); return lt8912_check_cable_status(lt); } @@ -607,12 +607,12 @@ lt8912_bridge_mode_valid(struct drm_bridge *bridge, } static enum drm_connector_status -lt8912_bridge_detect(struct drm_bridge *bridge) +lt8912_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt8912 *lt = bridge_to_lt8912(bridge); if (lt->hdmi_port->ops & DRM_BRIDGE_OP_DETECT) - return drm_bridge_detect(lt->hdmi_port); + return drm_bridge_detect(lt->hdmi_port, connector); return lt8912_check_cable_status(lt); } diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 399fa7eebd49..03fc8fd10f20 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -121,8 +121,7 @@ static int lt9211_read_chipid(struct lt9211 *ctx) } /* Test for known Chip ID. */ - if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE || - chipid[2] != REG_CHIPID2_VALUE) { + if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE) { dev_err(ctx->dev, "Unknown Chip ID: 0x%02x 0x%02x 0x%02x\n", chipid[0], chipid[1], chipid[2]); return -EINVAL; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index d6ee79c1e427..a2d032ee4744 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -543,7 +543,8 @@ static int lt9611_regulator_enable(struct lt9611 *lt9611) return 0; } -static enum drm_connector_status lt9611_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +lt9611_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); unsigned int reg_val = 0; @@ -936,8 +937,8 @@ lt9611_hdmi_tmds_char_rate_valid(const struct drm_bridge *bridge, return MODE_OK; } -static int lt9611_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge) +static int lt9611_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); @@ -952,8 +953,8 @@ static int lt9611_hdmi_audio_startup(struct drm_connector *connector, return 0; } -static int lt9611_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +static int lt9611_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -974,8 +975,8 @@ static int lt9611_hdmi_audio_prepare(struct drm_connector *connector, &hparms->cea); } -static void lt9611_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +static void lt9611_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 766da2cb45a7..38fb8776c0f4 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -353,7 +353,8 @@ static void lt9611uxc_bridge_mode_set(struct drm_bridge *bridge, lt9611uxc_unlock(lt9611uxc); } -static enum drm_connector_status lt9611uxc_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +lt9611uxc_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt9611uxc *lt9611uxc = bridge_to_lt9611uxc(bridge); unsigned int reg_val = 0; diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 81dde9ed7bcf..c9e6505cbd88 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -120,7 +120,8 @@ drm_connector_helper_funcs ge_b850v3_lvds_connector_helper_funcs = { .get_modes = ge_b850v3_lvds_get_modes, }; -static enum drm_connector_status ge_b850v3_lvds_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +ge_b850v3_lvds_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; @@ -141,7 +142,7 @@ static enum drm_connector_status ge_b850v3_lvds_bridge_detect(struct drm_bridge static enum drm_connector_status ge_b850v3_lvds_detect(struct drm_connector *connector, bool force) { - return ge_b850v3_lvds_bridge_detect(&ge_b850v3_lvds_ptr->bridge); + return ge_b850v3_lvds_bridge_detect(&ge_b850v3_lvds_ptr->bridge, connector); } static const struct drm_connector_funcs ge_b850v3_lvds_connector_funcs = { diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 6361a943e213..184a8b7049a7 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES; panel_bridge->bridge.type = connector_type; + panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first; drm_bridge_add(&panel_bridge->bridge); @@ -414,8 +415,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, return bridge; } - bridge->pre_enable_prev_first = panel->prepare_prev_first; - *ptr = bridge; devres_add(dev, ptr); @@ -457,8 +456,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, if (ret) return ERR_PTR(ret); - bridge->pre_enable_prev_first = panel->prepare_prev_first; - return bridge; } EXPORT_SYMBOL(drmm_panel_bridge_add); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index f2f666b27d2d..eabc4c32f6ab 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/phy/phy.h> #include <linux/platform_device.h> +#include <linux/units.h> #include <video/mipi_display.h> @@ -30,11 +31,10 @@ /* returns true iff both arguments logically differs */ #define NEQV(a, b) (!(a) ^ !(b)) -/* DSIM_STATUS */ +/* DSIM_STATUS or DSIM_DPHY_STATUS */ #define DSIM_STOP_STATE_DAT(x) (((x) & 0xf) << 0) #define DSIM_STOP_STATE_CLK BIT(8) #define DSIM_TX_READY_HS_CLK BIT(10) -#define DSIM_PLL_STABLE BIT(31) /* DSIM_SWRST */ #define DSIM_FUNCRST BIT(16) @@ -45,17 +45,13 @@ #define DSIM_BTA_TIMEOUT(x) ((x) << 16) /* DSIM_CLKCTRL */ -#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) -#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) -#define DSIM_LANE_ESC_CLK_EN_CLK BIT(19) -#define DSIM_LANE_ESC_CLK_EN_DATA(x) (((x) & 0xf) << 20) -#define DSIM_LANE_ESC_CLK_EN_DATA_MASK (0xf << 20) -#define DSIM_BYTE_CLKEN BIT(24) -#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) -#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) -#define DSIM_PLL_BYPASS BIT(27) -#define DSIM_ESC_CLKEN BIT(28) -#define DSIM_TX_REQUEST_HSCLK BIT(31) +#define DSIM_ESC_PRESCALER(x) (((x) & 0xffff) << 0) +#define DSIM_ESC_PRESCALER_MASK (0xffff << 0) +#define DSIM_LANE_ESC_CLK_EN_DATA(x, offset) (((x) & 0xf) << offset) +#define DSIM_LANE_ESC_CLK_EN_DATA_MASK(offset) (0xf << offset) +#define DSIM_BYTE_CLK_SRC(x) (((x) & 0x3) << 25) +#define DSIM_BYTE_CLK_SRC_MASK (0x3 << 25) +#define DSIM_PLL_BYPASS BIT(27) /* DSIM_CONFIG */ #define DSIM_LANE_EN_CLK BIT(0) @@ -90,7 +86,6 @@ */ #define DSIM_HSE_DISABLE_MODE BIT(23) #define DSIM_AUTO_MODE BIT(24) -#define DSIM_VIDEO_MODE BIT(25) #define DSIM_BURST_MODE BIT(26) #define DSIM_SYNC_INFORM BIT(27) #define DSIM_EOT_DISABLE BIT(28) @@ -128,9 +123,9 @@ #define DSIM_MAIN_HBP_MASK ((0xffff) << 0) /* DSIM_MSYNC */ -#define DSIM_MAIN_VSA(x) ((x) << 22) +#define DSIM_MAIN_VSA(x, offset) ((x) << offset) #define DSIM_MAIN_HSA(x) ((x) << 0) -#define DSIM_MAIN_VSA_MASK ((0x3ff) << 22) +#define DSIM_MAIN_VSA_MASK(offset) ((0x3ff) << offset) #define DSIM_MAIN_HSA_MASK ((0xffff) << 0) /* DSIM_SDRESOL */ @@ -156,6 +151,11 @@ #define DSIM_INT_RX_ECC_ERR BIT(15) #define DSIM_INT_RX_CRC_ERR BIT(14) +/* DSIM_SFRCTRL */ +#define DSIM_SFR_CTRL_STAND_BY BIT(4) +#define DSIM_SFR_CTRL_SHADOW_UPDATE BIT(1) +#define DSIM_SFR_CTRL_SHADOW_EN BIT(0) + /* DSIM_FIFOCTRL */ #define DSIM_RX_DATA_FULL BIT(25) #define DSIM_RX_DATA_EMPTY BIT(24) @@ -190,9 +190,7 @@ #define DSIM_PLL_DPDNSWAP_DAT (1 << 24) #define DSIM_FREQ_BAND(x) ((x) << 24) #define DSIM_PLL_EN BIT(23) -#define DSIM_PLL_P(x, offset) ((x) << (offset)) -#define DSIM_PLL_M(x) ((x) << 4) -#define DSIM_PLL_S(x) ((x) << 1) +#define DSIM_PLL(x, offset) ((x) << (offset)) /* DSIM_PHYCTRL */ #define DSIM_PHYCTRL_ULPS_EXIT(x) (((x) & 0x1ff) << 0) @@ -221,25 +219,42 @@ #define DSI_XFER_TIMEOUT_MS 100 #define DSI_RX_FIFO_EMPTY 0x30800002 -#define OLD_SCLK_MIPI_CLK_NAME "pll_clk" - #define PS_TO_CYCLE(ps, hz) DIV64_U64_ROUND_CLOSEST(((ps) * (hz)), 1000000000000ULL) -static const char *const clk_names[5] = { - "bus_clk", - "sclk_mipi", - "phyclk_mipidphy0_bitclkdiv8", - "phyclk_mipidphy0_rxclkesc0", - "sclk_rgb_vclk_to_dsim0" -}; - enum samsung_dsim_transfer_type { EXYNOS_DSI_TX, EXYNOS_DSI_RX, }; +static struct clk_bulk_data exynos3_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "pll_clk" }, +}; + +static struct clk_bulk_data exynos4_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, +}; + +static struct clk_bulk_data exynos5433_clk_bulk_data[] = { + { .id = "bus_clk" }, + { .id = "sclk_mipi" }, + { .id = "phyclk_mipidphy0_bitclkdiv8" }, + { .id = "phyclk_mipidphy0_rxclkesc0" }, + { .id = "sclk_rgb_vclk_to_dsim0" }, +}; + +static struct clk_bulk_data exynos7870_clk_bulk_data[] = { + { .id = "bus" }, + { .id = "pll" }, + { .id = "byte" }, + { .id = "esc" }, +}; + enum reg_idx { - DSIM_STATUS_REG, /* Status register */ + DSIM_STATUS_REG, /* Status register (legacy) */ + DSIM_LINK_STATUS_REG, /* Link status register */ + DSIM_DPHY_STATUS_REG, /* D-PHY status register */ DSIM_SWRST_REG, /* Software reset register */ DSIM_CLKCTRL_REG, /* Clock control register */ DSIM_TIMEOUT_REG, /* Time out register */ @@ -254,6 +269,7 @@ enum reg_idx { DSIM_PKTHDR_REG, /* Packet Header FIFO register */ DSIM_PAYLOAD_REG, /* Payload FIFO register */ DSIM_RXFIFO_REG, /* Read FIFO register */ + DSIM_SFRCTRL_REG, /* SFR standby and shadow control register */ DSIM_FIFOCTRL_REG, /* FIFO status and control register */ DSIM_PLLCTRL_REG, /* PLL control register */ DSIM_PHYCTRL_REG, @@ -311,6 +327,32 @@ static const unsigned int exynos5433_reg_ofs[] = { [DSIM_PHYTIMING2_REG] = 0xBC, }; +static const unsigned int exynos7870_reg_ofs[] = { + [DSIM_LINK_STATUS_REG] = 0x04, + [DSIM_DPHY_STATUS_REG] = 0x08, + [DSIM_SWRST_REG] = 0x0C, + [DSIM_CLKCTRL_REG] = 0x10, + [DSIM_TIMEOUT_REG] = 0x14, + [DSIM_ESCMODE_REG] = 0x1C, + [DSIM_MDRESOL_REG] = 0x20, + [DSIM_MVPORCH_REG] = 0x24, + [DSIM_MHPORCH_REG] = 0x28, + [DSIM_MSYNC_REG] = 0x2C, + [DSIM_CONFIG_REG] = 0x30, + [DSIM_INTSRC_REG] = 0x34, + [DSIM_INTMSK_REG] = 0x38, + [DSIM_PKTHDR_REG] = 0x3C, + [DSIM_PAYLOAD_REG] = 0x40, + [DSIM_RXFIFO_REG] = 0x44, + [DSIM_SFRCTRL_REG] = 0x48, + [DSIM_FIFOCTRL_REG] = 0x4C, + [DSIM_PLLCTRL_REG] = 0x94, + [DSIM_PHYCTRL_REG] = 0xA4, + [DSIM_PHYTIMING_REG] = 0xB4, + [DSIM_PHYTIMING1_REG] = 0xB8, + [DSIM_PHYTIMING2_REG] = 0xBC, +}; + enum reg_value_idx { RESET_TYPE, PLL_TIMER, @@ -383,6 +425,24 @@ static const unsigned int exynos5433_reg_values[] = { [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), }; +static const unsigned int exynos7870_reg_values[] = { + [RESET_TYPE] = DSIM_SWRST, + [PLL_TIMER] = 80000, + [STOP_STATE_CNT] = 0xa, + [PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0x177), + [PHYCTRL_VREG_LP] = 0, + [PHYCTRL_SLEW_UP] = 0, + [PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x07), + [PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0c), + [PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x08), + [PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x2b), + [PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0d), + [PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x09), + [PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x09), + [PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x0f), + [PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c), +}; + static const unsigned int imx8mm_dsim_reg_values[] = { [RESET_TYPE] = DSIM_SWRST, [PLL_TIMER] = 500, @@ -404,13 +464,26 @@ static const unsigned int imx8mm_dsim_reg_values[] = { static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -423,13 +496,26 @@ static const struct samsung_dsim_driver_data exynos3_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x50, + .has_legacy_status_reg = 1, .has_freqband = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -442,11 +528,24 @@ static const struct samsung_dsim_driver_data exynos4_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x58, - .num_clks = 2, + .has_legacy_status_reg = 1, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1000, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 11, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -458,12 +557,25 @@ static const struct samsung_dsim_driver_data exynos5_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 5, + .clk_data = exynos5433_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos5433_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5433_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -475,12 +587,25 @@ static const struct samsung_dsim_driver_data exynos5433_dsi_driver_data = { static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos3_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos3_clk_bulk_data), .max_freq = 1500, + .wait_for_hdr_fifo = 1, .wait_for_reset = 1, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, .pll_p_offset = 13, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = exynos5422_reg_values, .pll_fin_min = 6, .pll_fin_max = 12, @@ -489,19 +614,62 @@ static const struct samsung_dsim_driver_data exynos5422_dsi_driver_data = { .min_freq = 500, }; +static const struct samsung_dsim_driver_data exynos7870_dsi_driver_data = { + .reg_ofs = exynos7870_reg_ofs, + .plltmr_reg = 0xa0, + .has_clklane_stop = 1, + .has_sfrctrl = 1, + .clk_data = exynos7870_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos7870_clk_bulk_data), + .max_freq = 1500, + .wait_for_hdr_fifo = 0, + .wait_for_reset = 1, + .num_bits_resol = 12, + .video_mode_bit = 18, + .pll_stable_bit = 24, + .esc_clken_bit = 16, + .byte_clken_bit = 17, + .tx_req_hsclk_bit = 20, + .lane_esc_clk_bit = 8, + .lane_esc_data_offset = 9, + .pll_p_offset = 13, + .pll_m_offset = 3, + .pll_s_offset = 0, + .main_vsa_offset = 16, + .reg_values = exynos7870_reg_values, + .pll_fin_min = 6, + .pll_fin_max = 12, + .m_min = 41, + .m_max = 125, + .min_freq = 500, +}; + static const struct samsung_dsim_driver_data imx8mm_dsi_driver_data = { .reg_ofs = exynos5433_reg_ofs, .plltmr_reg = 0xa0, + .has_legacy_status_reg = 1, .has_clklane_stop = 1, - .num_clks = 2, + .clk_data = exynos4_clk_bulk_data, + .num_clks = ARRAY_SIZE(exynos4_clk_bulk_data), .max_freq = 2100, + .wait_for_hdr_fifo = 1, .wait_for_reset = 0, .num_bits_resol = 12, + .video_mode_bit = 25, + .pll_stable_bit = 31, + .esc_clken_bit = 28, + .byte_clken_bit = 24, + .tx_req_hsclk_bit = 31, + .lane_esc_clk_bit = 19, + .lane_esc_data_offset = 20, /* * Unlike Exynos, PLL_P(PMS_P) offset 14 is used in i.MX8M Mini/Nano/Plus * downstream driver - drivers/gpu/drm/bridge/sec-dsim.c */ .pll_p_offset = 14, + .pll_m_offset = 4, + .pll_s_offset = 1, + .main_vsa_offset = 22, .reg_values = imx8mm_dsim_reg_values, .pll_fin_min = 2, .pll_fin_max = 30, @@ -517,6 +685,7 @@ samsung_dsim_types[DSIM_TYPE_COUNT] = { [DSIM_TYPE_EXYNOS5410] = &exynos5_dsi_driver_data, [DSIM_TYPE_EXYNOS5422] = &exynos5422_dsi_driver_data, [DSIM_TYPE_EXYNOS5433] = &exynos5433_dsi_driver_data, + [DSIM_TYPE_EXYNOS7870] = &exynos7870_dsi_driver_data, [DSIM_TYPE_IMX8MM] = &imx8mm_dsi_driver_data, [DSIM_TYPE_IMX8MP] = &imx8mm_dsi_driver_data, }; @@ -558,10 +727,6 @@ static void samsung_dsim_reset(struct samsung_dsim *dsi) samsung_dsim_write(dsi, DSIM_SWRST_REG, reset_val); } -#ifndef MHZ -#define MHZ (1000 * 1000) -#endif - static unsigned long samsung_dsim_pll_find_pms(struct samsung_dsim *dsi, unsigned long fin, unsigned long fout, @@ -575,8 +740,8 @@ static unsigned long samsung_dsim_pll_find_pms(struct samsung_dsim *dsi, u16 _m, best_m; u8 _s, best_s; - p_min = DIV_ROUND_UP(fin, (driver_data->pll_fin_max * MHZ)); - p_max = fin / (driver_data->pll_fin_min * MHZ); + p_min = DIV_ROUND_UP(fin, (driver_data->pll_fin_max * HZ_PER_MHZ)); + p_max = fin / (driver_data->pll_fin_min * HZ_PER_MHZ); for (_p = p_min; _p <= p_max; ++_p) { for (_s = 0; _s <= 5; ++_s) { @@ -591,8 +756,8 @@ static unsigned long samsung_dsim_pll_find_pms(struct samsung_dsim *dsi, tmp = (u64)_m * fin; do_div(tmp, _p); - if (tmp < driver_data->min_freq * MHZ || - tmp > driver_data->max_freq * MHZ) + if (tmp < driver_data->min_freq * HZ_PER_MHZ || + tmp > driver_data->max_freq * HZ_PER_MHZ) continue; tmp = (u64)_m * fin; @@ -635,7 +800,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, * limit. */ fin = clk_get_rate(clk_get_parent(dsi->pll_clk)); - while (fin > driver_data->pll_fin_max * MHZ) + while (fin > driver_data->pll_fin_max * HZ_PER_MHZ) fin /= 2; clk_set_rate(dsi->pll_clk, fin); @@ -656,15 +821,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, writel(driver_data->reg_values[PLL_TIMER], dsi->reg_base + driver_data->plltmr_reg); - reg = DSIM_PLL_EN | DSIM_PLL_P(p, driver_data->pll_p_offset) | - DSIM_PLL_M(m) | DSIM_PLL_S(s); + reg = DSIM_PLL_EN | DSIM_PLL(p, driver_data->pll_p_offset) + | DSIM_PLL(m, driver_data->pll_m_offset) + | DSIM_PLL(s, driver_data->pll_s_offset); if (driver_data->has_freqband) { static const unsigned long freq_bands[] = { - 100 * MHZ, 120 * MHZ, 160 * MHZ, 200 * MHZ, - 270 * MHZ, 320 * MHZ, 390 * MHZ, 450 * MHZ, - 510 * MHZ, 560 * MHZ, 640 * MHZ, 690 * MHZ, - 770 * MHZ, 870 * MHZ, 950 * MHZ, + 100 * HZ_PER_MHZ, 120 * HZ_PER_MHZ, 160 * HZ_PER_MHZ, + 200 * HZ_PER_MHZ, 270 * HZ_PER_MHZ, 320 * HZ_PER_MHZ, + 390 * HZ_PER_MHZ, 450 * HZ_PER_MHZ, 510 * HZ_PER_MHZ, + 560 * HZ_PER_MHZ, 640 * HZ_PER_MHZ, 690 * HZ_PER_MHZ, + 770 * HZ_PER_MHZ, 870 * HZ_PER_MHZ, 950 * HZ_PER_MHZ, }; int band; @@ -684,14 +851,17 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, samsung_dsim_write(dsi, DSIM_PLLCTRL_REG, reg); - timeout = 1000; + timeout = 3000; do { if (timeout-- == 0) { dev_err(dsi->dev, "PLL failed to stabilize\n"); return 0; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); - } while ((reg & DSIM_PLL_STABLE) == 0); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_LINK_STATUS_REG); + } while ((reg & BIT(driver_data->pll_stable_bit)) == 0); dsi->hs_clock = fout; @@ -700,6 +870,7 @@ static unsigned long samsung_dsim_set_pll(struct samsung_dsim *dsi, static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; unsigned long hs_clk, byte_clk, esc_clk, pix_clk; unsigned long esc_div; u32 reg; @@ -724,7 +895,7 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) esc_div = DIV_ROUND_UP(byte_clk, dsi->esc_clk_rate); esc_clk = byte_clk / esc_div; - if (esc_clk > 20 * MHZ) { + if (esc_clk > 20 * HZ_PER_MHZ) { ++esc_div; esc_clk = byte_clk / esc_div; } @@ -733,15 +904,17 @@ static int samsung_dsim_enable_clock(struct samsung_dsim *dsi) hs_clk, byte_clk, esc_clk); reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS - | DSIM_BYTE_CLK_SRC_MASK); - reg |= DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN - | DSIM_ESC_PRESCALER(esc_div) - | DSIM_LANE_ESC_CLK_EN_CLK - | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1) - | DSIM_BYTE_CLK_SRC(0) - | DSIM_TX_REQUEST_HSCLK; + reg &= ~(DSIM_ESC_PRESCALER_MASK | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | DSIM_PLL_BYPASS + | DSIM_BYTE_CLK_SRC_MASK); + reg |= BIT(driver_data->esc_clken_bit) | BIT(driver_data->byte_clken_bit) + | DSIM_ESC_PRESCALER(esc_div) + | BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1, + driver_data->lane_esc_data_offset) + | DSIM_BYTE_CLK_SRC(0) + | BIT(driver_data->tx_req_hsclk_bit); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); return 0; @@ -845,11 +1018,14 @@ static void samsung_dsim_set_phy_ctrl(struct samsung_dsim *dsi) static void samsung_dsim_disable_clock(struct samsung_dsim *dsi) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_CLKCTRL_REG); - reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK - | DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN); + reg &= ~(BIT(driver_data->lane_esc_clk_bit) + | DSIM_LANE_ESC_CLK_EN_DATA_MASK(driver_data->lane_esc_data_offset) + | BIT(driver_data->esc_clken_bit) + | BIT(driver_data->byte_clken_bit)); samsung_dsim_write(dsi, DSIM_CLKCTRL_REG, reg); reg = samsung_dsim_read(dsi, DSIM_PLLCTRL_REG); @@ -893,14 +1069,12 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) * mode, otherwise it will support command mode. */ if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { - reg |= DSIM_VIDEO_MODE; + reg |= BIT(driver_data->video_mode_bit); /* * The user manual describes that following bits are ignored in * command mode. */ - if (!(dsi->mode_flags & MIPI_DSI_MODE_VSYNC_FLUSH)) - reg |= DSIM_MFLUSH_VS; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) reg |= DSIM_SYNC_INFORM; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) @@ -966,7 +1140,10 @@ static int samsung_dsim_init_link(struct samsung_dsim *dsi) return -EFAULT; } - reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + if (driver_data->has_legacy_status_reg) + reg = samsung_dsim_read(dsi, DSIM_STATUS_REG); + else + reg = samsung_dsim_read(dsi, DSIM_DPHY_STATUS_REG); if ((reg & DSIM_STOP_STATE_DAT(lanes_mask)) != DSIM_STOP_STATE_DAT(lanes_mask)) continue; @@ -987,6 +1164,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) { struct drm_display_mode *m = &dsi->mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; + unsigned int main_vsa_offset = dsi->driver_data->main_vsa_offset; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { @@ -1013,7 +1191,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) reg = DSIM_MAIN_HFP(hfp) | DSIM_MAIN_HBP(hbp); samsung_dsim_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start, main_vsa_offset) | DSIM_MAIN_HSA(hsa); samsung_dsim_write(dsi, DSIM_MSYNC_REG, reg); } @@ -1027,6 +1205,7 @@ static void samsung_dsim_set_display_mode(struct samsung_dsim *dsi) static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enable) { + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; u32 reg; reg = samsung_dsim_read(dsi, DSIM_MDRESOL_REG); @@ -1035,6 +1214,15 @@ static void samsung_dsim_set_display_enable(struct samsung_dsim *dsi, bool enabl else reg &= ~DSIM_MAIN_STAND_BY; samsung_dsim_write(dsi, DSIM_MDRESOL_REG, reg); + + if (driver_data->has_sfrctrl) { + reg = samsung_dsim_read(dsi, DSIM_SFRCTRL_REG); + if (enable) + reg |= DSIM_SFR_CTRL_STAND_BY; + else + reg &= ~DSIM_SFR_CTRL_STAND_BY; + samsung_dsim_write(dsi, DSIM_SFRCTRL_REG, reg); + } } static int samsung_dsim_wait_for_hdr_fifo(struct samsung_dsim *dsi) @@ -1091,12 +1279,13 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, { struct device *dev = dsi->dev; struct mipi_dsi_packet *pkt = &xfer->packet; + const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; const u8 *payload = pkt->payload + xfer->tx_done; u16 length = pkt->payload_length - xfer->tx_done; bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1131,9 +1320,11 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, return; reg = get_unaligned_le32(pkt->header); - if (samsung_dsim_wait_for_hdr_fifo(dsi)) { - dev_err(dev, "waiting for header FIFO timed out\n"); - return; + if (driver_data->wait_for_hdr_fifo) { + if (samsung_dsim_wait_for_hdr_fifo(dsi)) { + dev_err(dev, "waiting for header FIFO timed out\n"); + return; + } } if (NEQV(xfer->flags & MIPI_DSI_MSG_USE_LPM, @@ -1236,43 +1427,34 @@ static void samsung_dsim_transfer_start(struct samsung_dsim *dsi) { unsigned long flags; struct samsung_dsim_transfer *xfer; - bool start = false; -again: spin_lock_irqsave(&dsi->transfer_lock, flags); - if (list_empty(&dsi->transfer_list)) { - spin_unlock_irqrestore(&dsi->transfer_lock, flags); - return; - } - - xfer = list_first_entry(&dsi->transfer_list, - struct samsung_dsim_transfer, list); + while (!list_empty(&dsi->transfer_list)) { + xfer = list_first_entry(&dsi->transfer_list, + struct samsung_dsim_transfer, list); - spin_unlock_irqrestore(&dsi->transfer_lock, flags); + spin_unlock_irqrestore(&dsi->transfer_lock, flags); - if (xfer->packet.payload_length && - xfer->tx_done == xfer->packet.payload_length) - /* waiting for RX */ - return; + if (xfer->packet.payload_length && + xfer->tx_done == xfer->packet.payload_length) + /* waiting for RX */ + return; - samsung_dsim_send_to_fifo(dsi, xfer); + samsung_dsim_send_to_fifo(dsi, xfer); - if (xfer->packet.payload_length || xfer->rx_len) - return; + if (xfer->packet.payload_length || xfer->rx_len) + return; - xfer->result = 0; - complete(&xfer->completed); + xfer->result = 0; + complete(&xfer->completed); - spin_lock_irqsave(&dsi->transfer_lock, flags); + spin_lock_irqsave(&dsi->transfer_lock, flags); - list_del_init(&xfer->list); - start = !list_empty(&dsi->transfer_list); + list_del_init(&xfer->list); + } spin_unlock_irqrestore(&dsi->transfer_lock, flags); - - if (start) - goto again; } static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) @@ -1294,7 +1476,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); @@ -1935,7 +2117,7 @@ int samsung_dsim_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct samsung_dsim *dsi; - int ret, i; + int ret; dsi = devm_drm_bridge_alloc(dev, struct samsung_dsim, bridge, &samsung_dsim_bridge_funcs); if (IS_ERR(dsi)) @@ -1959,23 +2141,11 @@ int samsung_dsim_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to get regulators\n"); - dsi->clks = devm_kcalloc(dev, dsi->driver_data->num_clks, - sizeof(*dsi->clks), GFP_KERNEL); - if (!dsi->clks) - return -ENOMEM; - - for (i = 0; i < dsi->driver_data->num_clks; i++) { - dsi->clks[i] = devm_clk_get(dev, clk_names[i]); - if (IS_ERR(dsi->clks[i])) { - if (strcmp(clk_names[i], "sclk_mipi") == 0) { - dsi->clks[i] = devm_clk_get(dev, OLD_SCLK_MIPI_CLK_NAME); - if (!IS_ERR(dsi->clks[i])) - continue; - } - - dev_info(dev, "failed to get the clock: %s\n", clk_names[i]); - return PTR_ERR(dsi->clks[i]); - } + ret = devm_clk_bulk_get(dev, dsi->driver_data->num_clks, + dsi->driver_data->clk_data); + if (ret) { + dev_err(dev, "failed to get clocks in bulk (%d)\n", ret); + return ret; } dsi->reg_base = devm_platform_ioremap_resource(pdev, 0); @@ -2048,7 +2218,7 @@ static int samsung_dsim_suspend(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; usleep_range(10000, 20000); @@ -2064,8 +2234,7 @@ static int samsung_dsim_suspend(struct device *dev) phy_power_off(dsi->phy); - for (i = driver_data->num_clks - 1; i > -1; i--) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) @@ -2078,7 +2247,7 @@ static int samsung_dsim_resume(struct device *dev) { struct samsung_dsim *dsi = dev_get_drvdata(dev); const struct samsung_dsim_driver_data *driver_data = dsi->driver_data; - int ret, i; + int ret; ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret < 0) { @@ -2086,11 +2255,9 @@ static int samsung_dsim_resume(struct device *dev) return ret; } - for (i = 0; i < driver_data->num_clks; i++) { - ret = clk_prepare_enable(dsi->clks[i]); - if (ret < 0) - goto err_clk; - } + ret = clk_bulk_prepare_enable(driver_data->num_clks, driver_data->clk_data); + if (ret < 0) + goto err_clk; ret = phy_power_on(dsi->phy); if (ret < 0) { @@ -2101,8 +2268,7 @@ static int samsung_dsim_resume(struct device *dev) return 0; err_clk: - while (--i > -1) - clk_disable_unprepare(dsi->clks[i]); + clk_bulk_disable_unprepare(driver_data->num_clks, driver_data->clk_data); regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies); return ret; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 882973e90062..d537b1d036fb 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -458,7 +458,8 @@ static int sii902x_bridge_attach(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status sii902x_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +sii902x_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct sii902x *sii902x = bridge_to_sii902x(bridge); diff --git a/drivers/gpu/drm/bridge/simple-bridge.c b/drivers/gpu/drm/bridge/simple-bridge.c index c66bd913e33a..e4d0bc2200f8 100644 --- a/drivers/gpu/drm/bridge/simple-bridge.c +++ b/drivers/gpu/drm/bridge/simple-bridge.c @@ -90,7 +90,7 @@ simple_bridge_connector_detect(struct drm_connector *connector, bool force) { struct simple_bridge *sbridge = drm_connector_to_simple_bridge(connector); - return drm_bridge_detect(sbridge->next_bridge); + return drm_bridge_detect(sbridge->next_bridge, connector); } static const struct drm_connector_funcs simple_bridge_con_funcs = { @@ -262,6 +262,16 @@ static const struct of_device_id simple_bridge_match[] = { .connector_type = DRM_MODE_CONNECTOR_VGA, }, }, { + .compatible = "radxa,ra620", + .data = &(const struct simple_bridge_info) { + .connector_type = DRM_MODE_CONNECTOR_HDMIA, + }, + }, { + .compatible = "realtek,rtd2171", + .data = &(const struct simple_bridge_info) { + .connector_type = DRM_MODE_CONNECTOR_HDMIA, + }, + }, { .compatible = "ti,opa362", .data = &(const struct simple_bridge_info) { .connector_type = DRM_MODE_CONNECTOR_Composite, diff --git a/drivers/gpu/drm/bridge/ssd2825.c b/drivers/gpu/drm/bridge/ssd2825.c new file mode 100644 index 000000000000..f2fdbf7c117d --- /dev/null +++ b/drivers/gpu/drm/bridge/ssd2825.c @@ -0,0 +1,775 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> +#include <linux/units.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_drv.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> +#include <drm/drm_panel.h> +#include <video/mipi_display.h> + +#define SSD2825_DEVICE_ID_REG 0xb0 +#define SSD2825_RGB_INTERFACE_CTRL_REG_1 0xb1 +#define SSD2825_RGB_INTERFACE_CTRL_REG_2 0xb2 +#define SSD2825_RGB_INTERFACE_CTRL_REG_3 0xb3 +#define SSD2825_RGB_INTERFACE_CTRL_REG_4 0xb4 +#define SSD2825_RGB_INTERFACE_CTRL_REG_5 0xb5 +#define SSD2825_RGB_INTERFACE_CTRL_REG_6 0xb6 +#define SSD2825_NON_BURST_EV BIT(2) +#define SSD2825_BURST BIT(3) +#define SSD2825_PCKL_HIGH BIT(13) +#define SSD2825_HSYNC_HIGH BIT(14) +#define SSD2825_VSYNC_HIGH BIT(15) +#define SSD2825_CONFIGURATION_REG 0xb7 +#define SSD2825_CONF_REG_HS BIT(0) +#define SSD2825_CONF_REG_CKE BIT(1) +#define SSD2825_CONF_REG_SLP BIT(2) +#define SSD2825_CONF_REG_VEN BIT(3) +#define SSD2825_CONF_REG_HCLK BIT(4) +#define SSD2825_CONF_REG_CSS BIT(5) +#define SSD2825_CONF_REG_DCS BIT(6) +#define SSD2825_CONF_REG_REN BIT(7) +#define SSD2825_CONF_REG_ECD BIT(8) +#define SSD2825_CONF_REG_EOT BIT(9) +#define SSD2825_CONF_REG_LPE BIT(10) +#define SSD2825_VC_CTRL_REG 0xb8 +#define SSD2825_PLL_CTRL_REG 0xb9 +#define SSD2825_PLL_CONFIGURATION_REG 0xba +#define SSD2825_CLOCK_CTRL_REG 0xbb +#define SSD2825_PACKET_SIZE_CTRL_REG_1 0xbc +#define SSD2825_PACKET_SIZE_CTRL_REG_2 0xbd +#define SSD2825_PACKET_SIZE_CTRL_REG_3 0xbe +#define SSD2825_PACKET_DROP_REG 0xbf +#define SSD2825_OPERATION_CTRL_REG 0xc0 +#define SSD2825_MAX_RETURN_SIZE_REG 0xc1 +#define SSD2825_RETURN_DATA_COUNT_REG 0xc2 +#define SSD2825_ACK_RESPONSE_REG 0xc3 +#define SSD2825_LINE_CTRL_REG 0xc4 +#define SSD2825_INTERRUPT_CTRL_REG 0xc5 +#define SSD2825_INTERRUPT_STATUS_REG 0xc6 +#define SSD2825_ERROR_STATUS_REG 0xc7 +#define SSD2825_DATA_FORMAT_REG 0xc8 +#define SSD2825_DELAY_ADJ_REG_1 0xc9 +#define SSD2825_DELAY_ADJ_REG_2 0xca +#define SSD2825_DELAY_ADJ_REG_3 0xcb +#define SSD2825_DELAY_ADJ_REG_4 0xcc +#define SSD2825_DELAY_ADJ_REG_5 0xcd +#define SSD2825_DELAY_ADJ_REG_6 0xce +#define SSD2825_HS_TX_TIMER_REG_1 0xcf +#define SSD2825_HS_TX_TIMER_REG_2 0xd0 +#define SSD2825_LP_RX_TIMER_REG_1 0xd1 +#define SSD2825_LP_RX_TIMER_REG_2 0xd2 +#define SSD2825_TE_STATUS_REG 0xd3 +#define SSD2825_SPI_READ_REG 0xd4 +#define SSD2825_SPI_READ_REG_RESET 0xfa +#define SSD2825_PLL_LOCK_REG 0xd5 +#define SSD2825_TEST_REG 0xd6 +#define SSD2825_TE_COUNT_REG 0xd7 +#define SSD2825_ANALOG_CTRL_REG_1 0xd8 +#define SSD2825_ANALOG_CTRL_REG_2 0xd9 +#define SSD2825_ANALOG_CTRL_REG_3 0xda +#define SSD2825_ANALOG_CTRL_REG_4 0xdb +#define SSD2825_INTERRUPT_OUT_CTRL_REG 0xdc +#define SSD2825_RGB_INTERFACE_CTRL_REG_7 0xdd +#define SSD2825_LANE_CONFIGURATION_REG 0xde +#define SSD2825_DELAY_ADJ_REG_7 0xdf +#define SSD2825_INPUT_PIN_CTRL_REG_1 0xe0 +#define SSD2825_INPUT_PIN_CTRL_REG_2 0xe1 +#define SSD2825_BIDIR_PIN_CTRL_REG_1 0xe2 +#define SSD2825_BIDIR_PIN_CTRL_REG_2 0xe3 +#define SSD2825_BIDIR_PIN_CTRL_REG_3 0xe4 +#define SSD2825_BIDIR_PIN_CTRL_REG_4 0xe5 +#define SSD2825_BIDIR_PIN_CTRL_REG_5 0xe6 +#define SSD2825_BIDIR_PIN_CTRL_REG_6 0xe7 +#define SSD2825_BIDIR_PIN_CTRL_REG_7 0xe8 +#define SSD2825_CABC_BRIGHTNESS_CTRL_REG_1 0xe9 +#define SSD2825_CABC_BRIGHTNESS_CTRL_REG_2 0xea +#define SSD2825_CABC_BRIGHTNESS_STATUS_REG 0xeb +#define SSD2825_READ_REG 0xff + +#define SSD2825_COM_BYTE 0x00 +#define SSD2825_DAT_BYTE 0x01 + +#define SSD2828_LP_CLOCK_DIVIDER(n) (((n) - 1) & 0x3f) +#define SSD2825_LP_MIN_CLK 5000 /* KHz */ +#define SSD2825_REF_MIN_CLK 2000 /* KHz */ + +static const struct regulator_bulk_data ssd2825_supplies[] = { + { .supply = "dvdd" }, + { .supply = "avdd" }, + { .supply = "vddio" }, +}; + +struct ssd2825_dsi_output { + struct mipi_dsi_device *dev; + struct drm_panel *panel; + struct drm_bridge *bridge; +}; + +struct ssd2825_priv { + struct spi_device *spi; + struct device *dev; + + struct gpio_desc *reset_gpio; + struct regulator_bulk_data *supplies; + + struct clk *tx_clk; + + struct mipi_dsi_host dsi_host; + struct drm_bridge bridge; + struct ssd2825_dsi_output output; + + struct mutex mlock; /* for host transfer operations */ + + u32 pd_lines; /* number of Parallel Port Input Data Lines */ + u32 dsi_lanes; /* number of DSI Lanes */ + + /* Parameters for PLL programming */ + u32 pll_freq_kbps; /* PLL in kbps */ + u32 nibble_freq_khz; /* PLL div by 4 */ + + u32 hzd; /* HS Zero Delay in ns*/ + u32 hpd; /* HS Prepare Delay is ns */ +}; + +static inline struct ssd2825_priv *dsi_host_to_ssd2825(struct mipi_dsi_host *host) +{ + return container_of(host, struct ssd2825_priv, dsi_host); +} + +static inline struct ssd2825_priv *bridge_to_ssd2825(struct drm_bridge *bridge) +{ + return container_of(bridge, struct ssd2825_priv, bridge); +} + +static int ssd2825_write_raw(struct ssd2825_priv *priv, u8 high_byte, u8 low_byte) +{ + struct spi_device *spi = priv->spi; + u8 tx_buf[2]; + + /* + * Low byte is the value, high byte defines type of + * write cycle, 0 for command and 1 for data. + */ + tx_buf[0] = low_byte; + tx_buf[1] = high_byte; + + return spi_write(spi, tx_buf, 2); +} + +static int ssd2825_write_reg(struct ssd2825_priv *priv, u8 reg, u16 command) +{ + u8 datal = (command & 0x00FF); + u8 datah = (command & 0xFF00) >> 8; + int ret; + + /* Command write cycle */ + ret = ssd2825_write_raw(priv, SSD2825_COM_BYTE, reg); + if (ret) + return ret; + + /* Data write cycle bits 7-0 */ + ret = ssd2825_write_raw(priv, SSD2825_DAT_BYTE, datal); + if (ret) + return ret; + + /* Data write cycle bits 15-8 */ + ret = ssd2825_write_raw(priv, SSD2825_DAT_BYTE, datah); + if (ret) + return ret; + + return 0; +} + +static int ssd2825_write_dsi(struct ssd2825_priv *priv, const u8 *command, int len) +{ + int ret, i; + + ret = ssd2825_write_reg(priv, SSD2825_PACKET_SIZE_CTRL_REG_1, len); + if (ret) + return ret; + + ret = ssd2825_write_raw(priv, SSD2825_COM_BYTE, SSD2825_PACKET_DROP_REG); + if (ret) + return ret; + + for (i = 0; i < len; i++) { + ret = ssd2825_write_raw(priv, SSD2825_DAT_BYTE, command[i]); + if (ret) + return ret; + } + + return 0; +} + +static int ssd2825_read_raw(struct ssd2825_priv *priv, u8 cmd, u16 *data) +{ + struct spi_device *spi = priv->spi; + struct spi_message msg; + struct spi_transfer xfer[2]; + u8 tx_buf[2]; + u8 rx_buf[2]; + int ret; + + memset(&xfer, 0, sizeof(xfer)); + + tx_buf[1] = (cmd & 0xFF00) >> 8; + tx_buf[0] = (cmd & 0x00FF); + + xfer[0].tx_buf = tx_buf; + xfer[0].bits_per_word = 9; + xfer[0].len = 2; + + xfer[1].rx_buf = rx_buf; + xfer[1].bits_per_word = 16; + xfer[1].len = 2; + + spi_message_init(&msg); + spi_message_add_tail(&xfer[0], &msg); + spi_message_add_tail(&xfer[1], &msg); + + ret = spi_sync(spi, &msg); + if (ret) { + dev_err(&spi->dev, "ssd2825 read raw failed %d\n", ret); + return ret; + } + + *data = rx_buf[1] | (rx_buf[0] << 8); + + return 0; +} + +static int ssd2825_read_reg(struct ssd2825_priv *priv, u8 reg, u16 *data) +{ + int ret; + + /* Reset the read register */ + ret = ssd2825_write_reg(priv, SSD2825_SPI_READ_REG, SSD2825_SPI_READ_REG_RESET); + if (ret) + return ret; + + /* Push the address to read */ + ret = ssd2825_write_raw(priv, SSD2825_COM_BYTE, reg); + if (ret) + return ret; + + /* Perform a reading cycle */ + ret = ssd2825_read_raw(priv, SSD2825_SPI_READ_REG_RESET, data); + if (ret) + return ret; + + return 0; +} + +static int ssd2825_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *dev) +{ + struct ssd2825_priv *priv = dsi_host_to_ssd2825(host); + struct drm_bridge *bridge; + struct drm_panel *panel; + struct device_node *ep; + int ret; + + if (dev->lanes > 4) { + dev_err(priv->dev, "unsupported number of data lanes(%u)\n", dev->lanes); + return -EINVAL; + } + + /* + * ssd2825 supports both Video and Pulse mode, but the driver only + * implements Video (event) mode currently + */ + if (!(dev->mode_flags & MIPI_DSI_MODE_VIDEO)) { + dev_err(priv->dev, "Only MIPI_DSI_MODE_VIDEO is supported\n"); + return -EOPNOTSUPP; + } + + ret = drm_of_find_panel_or_bridge(host->dev->of_node, 1, 0, &panel, &bridge); + if (ret) + return ret; + + if (panel) { + bridge = drm_panel_bridge_add_typed(panel, DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + } + + priv->output.dev = dev; + priv->output.bridge = bridge; + priv->output.panel = panel; + + priv->dsi_lanes = dev->lanes; + + /* get input ep (port0/endpoint0) */ + ret = -EINVAL; + ep = of_graph_get_endpoint_by_regs(host->dev->of_node, 0, 0); + if (ep) { + ret = of_property_read_u32(ep, "bus-width", &priv->pd_lines); + of_node_put(ep); + } + + if (ret) + priv->pd_lines = mipi_dsi_pixel_format_to_bpp(dev->format); + + drm_bridge_add(&priv->bridge); + + return 0; +} + +static int ssd2825_dsi_host_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *dev) +{ + struct ssd2825_priv *priv = dsi_host_to_ssd2825(host); + + drm_bridge_remove(&priv->bridge); + if (priv->output.panel) + drm_panel_bridge_remove(priv->output.bridge); + + return 0; +} + +static ssize_t ssd2825_dsi_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct ssd2825_priv *priv = dsi_host_to_ssd2825(host); + u16 config; + int ret; + + if (msg->rx_len) { + dev_warn(priv->dev, "MIPI rx is not supported\n"); + return -EOPNOTSUPP; + } + + guard(mutex)(&priv->mlock); + + ret = ssd2825_read_reg(priv, SSD2825_CONFIGURATION_REG, &config); + if (ret) + return ret; + + switch (msg->type) { + case MIPI_DSI_DCS_SHORT_WRITE: + case MIPI_DSI_DCS_SHORT_WRITE_PARAM: + case MIPI_DSI_DCS_LONG_WRITE: + config |= SSD2825_CONF_REG_DCS; + break; + case MIPI_DSI_GENERIC_SHORT_WRITE_0_PARAM: + case MIPI_DSI_GENERIC_SHORT_WRITE_1_PARAM: + case MIPI_DSI_GENERIC_SHORT_WRITE_2_PARAM: + case MIPI_DSI_GENERIC_LONG_WRITE: + config &= ~SSD2825_CONF_REG_DCS; + break; + case MIPI_DSI_DCS_READ: + case MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM: + case MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM: + case MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM: + default: + return 0; + } + + ret = ssd2825_write_reg(priv, SSD2825_CONFIGURATION_REG, config); + if (ret) + return ret; + + ret = ssd2825_write_reg(priv, SSD2825_VC_CTRL_REG, 0x0000); + if (ret) + return ret; + + ret = ssd2825_write_dsi(priv, msg->tx_buf, msg->tx_len); + if (ret) + return ret; + + return 0; +} + +static const struct mipi_dsi_host_ops ssd2825_dsi_host_ops = { + .attach = ssd2825_dsi_host_attach, + .detach = ssd2825_dsi_host_detach, + .transfer = ssd2825_dsi_host_transfer, +}; + +static void ssd2825_hw_reset(struct ssd2825_priv *priv) +{ + gpiod_set_value_cansleep(priv->reset_gpio, 1); + usleep_range(5000, 6000); + gpiod_set_value_cansleep(priv->reset_gpio, 0); + usleep_range(5000, 6000); +} + +/* + * PLL configuration register settings. + * + * See the "PLL Configuration Register Description" in the SSD2825 datasheet. + */ +static u16 construct_pll_config(struct ssd2825_priv *priv, + u32 desired_pll_freq_kbps, u32 reference_freq_khz) +{ + u32 div_factor = 1, mul_factor, fr = 0; + + while (reference_freq_khz / (div_factor + 1) >= SSD2825_REF_MIN_CLK) + div_factor++; + if (div_factor > 31) + div_factor = 31; + + mul_factor = DIV_ROUND_UP(desired_pll_freq_kbps * div_factor, + reference_freq_khz); + + priv->pll_freq_kbps = reference_freq_khz * mul_factor / div_factor; + priv->nibble_freq_khz = priv->pll_freq_kbps / 4; + + if (priv->pll_freq_kbps >= 501000) + fr = 3; + else if (priv->pll_freq_kbps >= 251000) + fr = 2; + else if (priv->pll_freq_kbps >= 126000) + fr = 1; + + return (fr << 14) | (div_factor << 8) | mul_factor; +} + +static int ssd2825_setup_pll(struct ssd2825_priv *priv, + const struct drm_display_mode *mode) +{ + u16 pll_config, lp_div; + u32 nibble_delay, pclk_mult, tx_freq_khz; + u8 hzd, hpd; + + tx_freq_khz = clk_get_rate(priv->tx_clk) / KILO; + if (!tx_freq_khz) + tx_freq_khz = SSD2825_REF_MIN_CLK; + + pclk_mult = priv->pd_lines / priv->dsi_lanes + 1; + pll_config = construct_pll_config(priv, pclk_mult * mode->clock, + tx_freq_khz); + + lp_div = priv->pll_freq_kbps / (SSD2825_LP_MIN_CLK * 8); + + /* nibble_delay in nanoseconds */ + nibble_delay = MICRO / priv->nibble_freq_khz; + + hzd = priv->hzd / nibble_delay; + hpd = (priv->hpd - 4 * nibble_delay) / nibble_delay; + + /* Disable PLL */ + ssd2825_write_reg(priv, SSD2825_PLL_CTRL_REG, 0x0000); + ssd2825_write_reg(priv, SSD2825_LINE_CTRL_REG, 0x0001); + + /* Set delays */ + ssd2825_write_reg(priv, SSD2825_DELAY_ADJ_REG_1, (hzd << 8) | hpd); + + /* Set PLL coefficients */ + ssd2825_write_reg(priv, SSD2825_PLL_CONFIGURATION_REG, pll_config); + + /* Clock Control Register */ + ssd2825_write_reg(priv, SSD2825_CLOCK_CTRL_REG, + SSD2828_LP_CLOCK_DIVIDER(lp_div)); + + /* Enable PLL */ + ssd2825_write_reg(priv, SSD2825_PLL_CTRL_REG, 0x0001); + ssd2825_write_reg(priv, SSD2825_VC_CTRL_REG, 0); + + return 0; +} + +static void ssd2825_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct ssd2825_priv *priv = bridge_to_ssd2825(bridge); + struct mipi_dsi_device *dsi_dev = priv->output.dev; + const struct drm_crtc_state *crtc_state; + const struct drm_display_mode *mode; + struct drm_connector *connector; + struct drm_crtc *crtc; + u32 input_bus_flags = bridge->timings->input_bus_flags; + u16 flags = 0, config; + u8 pixel_format; + int ret; + + /* Power Sequence */ + ret = clk_prepare_enable(priv->tx_clk); + if (ret) + dev_err(priv->dev, "error enabling tx_clk (%d)\n", ret); + + ret = regulator_bulk_enable(ARRAY_SIZE(ssd2825_supplies), priv->supplies); + if (ret) + dev_err(priv->dev, "error enabling regulators (%d)\n", ret); + + usleep_range(1000, 2000); + + ssd2825_hw_reset(priv); + + /* Perform SW reset */ + ssd2825_write_reg(priv, SSD2825_OPERATION_CTRL_REG, 0x0100); + + /* Set pixel format */ + switch (dsi_dev->format) { + case MIPI_DSI_FMT_RGB565: + pixel_format = 0x00; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + pixel_format = 0x01; + break; + case MIPI_DSI_FMT_RGB666: + pixel_format = 0x02; + break; + case MIPI_DSI_FMT_RGB888: + default: + pixel_format = 0x03; + break; + } + + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + crtc = drm_atomic_get_new_connector_state(state, connector)->crtc; + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + mode = &crtc_state->adjusted_mode; + + /* Set panel timings */ + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_1, + ((mode->vtotal - mode->vsync_end) << 8) | + (mode->htotal - mode->hsync_end)); + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_2, + ((mode->vtotal - mode->vsync_start) << 8) | + (mode->htotal - mode->hsync_start)); + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_3, + ((mode->vsync_start - mode->vdisplay) << 8) | + (mode->hsync_start - mode->hdisplay)); + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_4, mode->hdisplay); + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_5, mode->vdisplay); + + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + flags |= SSD2825_HSYNC_HIGH; + + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + flags |= SSD2825_VSYNC_HIGH; + + if (dsi_dev->mode_flags & MIPI_DSI_MODE_VIDEO) + flags |= SSD2825_NON_BURST_EV; + + if (input_bus_flags & DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE) + flags |= SSD2825_PCKL_HIGH; + + ssd2825_write_reg(priv, SSD2825_RGB_INTERFACE_CTRL_REG_6, flags | pixel_format); + ssd2825_write_reg(priv, SSD2825_LANE_CONFIGURATION_REG, dsi_dev->lanes - 1); + ssd2825_write_reg(priv, SSD2825_TEST_REG, 0x0004); + + /* Call PLL configuration */ + ssd2825_setup_pll(priv, mode); + + usleep_range(10000, 11000); + + config = SSD2825_CONF_REG_HS | SSD2825_CONF_REG_CKE | SSD2825_CONF_REG_DCS | + SSD2825_CONF_REG_ECD | SSD2825_CONF_REG_EOT; + + if (dsi_dev->mode_flags & MIPI_DSI_MODE_LPM) + config &= ~SSD2825_CONF_REG_HS; + + if (dsi_dev->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) + config &= ~SSD2825_CONF_REG_EOT; + + /* Initial DSI configuration register set */ + ssd2825_write_reg(priv, SSD2825_CONFIGURATION_REG, config); + ssd2825_write_reg(priv, SSD2825_VC_CTRL_REG, 0); + + if (priv->output.panel) + drm_panel_enable(priv->output.panel); +} + +static void ssd2825_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct ssd2825_priv *priv = bridge_to_ssd2825(bridge); + struct mipi_dsi_device *dsi_dev = priv->output.dev; + u16 config; + + config = SSD2825_CONF_REG_HS | SSD2825_CONF_REG_DCS | + SSD2825_CONF_REG_ECD | SSD2825_CONF_REG_EOT; + + if (dsi_dev->mode_flags & MIPI_DSI_MODE_VIDEO) + config |= SSD2825_CONF_REG_VEN; + + if (dsi_dev->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) + config &= ~SSD2825_CONF_REG_EOT; + + /* Complete configuration after DSI commands were sent */ + ssd2825_write_reg(priv, SSD2825_CONFIGURATION_REG, config); + ssd2825_write_reg(priv, SSD2825_PLL_CTRL_REG, 0x0001); + ssd2825_write_reg(priv, SSD2825_VC_CTRL_REG, 0x0000); +} + +static void ssd2825_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct ssd2825_priv *priv = bridge_to_ssd2825(bridge); + int ret; + + msleep(100); + + /* Exit DSI configuration register set */ + ssd2825_write_reg(priv, SSD2825_CONFIGURATION_REG, + SSD2825_CONF_REG_ECD | SSD2825_CONF_REG_EOT); + ssd2825_write_reg(priv, SSD2825_VC_CTRL_REG, 0); + + /* HW disable */ + gpiod_set_value_cansleep(priv->reset_gpio, 1); + usleep_range(5000, 6000); + + ret = regulator_bulk_disable(ARRAY_SIZE(ssd2825_supplies), + priv->supplies); + if (ret < 0) + dev_err(priv->dev, "error disabling regulators (%d)\n", ret); + + clk_disable_unprepare(priv->tx_clk); +} + +static int ssd2825_bridge_attach(struct drm_bridge *bridge, struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) +{ + struct ssd2825_priv *priv = bridge_to_ssd2825(bridge); + + return drm_bridge_attach(bridge->encoder, priv->output.bridge, bridge, + flags); +} + +static enum drm_mode_status +ssd2825_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + if (mode->hdisplay > 1366) + return MODE_H_ILLEGAL; + + if (mode->vdisplay > 1366) + return MODE_V_ILLEGAL; + + return MODE_OK; +} + +static bool ssd2825_mode_fixup(struct drm_bridge *bridge, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + /* Default to positive sync */ + + if (!(adjusted_mode->flags & + (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC))) + adjusted_mode->flags |= DRM_MODE_FLAG_PHSYNC; + + if (!(adjusted_mode->flags & + (DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC))) + adjusted_mode->flags |= DRM_MODE_FLAG_PVSYNC; + + return true; +} + +static const struct drm_bridge_funcs ssd2825_bridge_funcs = { + .attach = ssd2825_bridge_attach, + .mode_valid = ssd2825_bridge_mode_valid, + .mode_fixup = ssd2825_mode_fixup, + + .atomic_pre_enable = ssd2825_bridge_atomic_pre_enable, + .atomic_enable = ssd2825_bridge_atomic_enable, + .atomic_disable = ssd2825_bridge_atomic_disable, + + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, +}; + +static const struct drm_bridge_timings default_ssd2825_timings = { + .input_bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE + | DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE + | DRM_BUS_FLAG_DE_HIGH, +}; + +static int ssd2825_probe(struct spi_device *spi) +{ + struct ssd2825_priv *priv; + struct device *dev = &spi->dev; + struct device_node *np = dev->of_node; + int ret; + + /* Driver supports only 8 bit 3 Wire mode */ + spi->bits_per_word = 9; + + ret = spi_setup(spi); + if (ret) + return ret; + + priv = devm_drm_bridge_alloc(dev, struct ssd2825_priv, bridge, &ssd2825_bridge_funcs); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + spi_set_drvdata(spi, priv); + + priv->spi = spi; + priv->dev = dev; + + mutex_init(&priv->mlock); + + priv->tx_clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(priv->tx_clk)) + return dev_err_probe(dev, PTR_ERR(priv->tx_clk), + "can't retrieve bridge tx_clk\n"); + + priv->reset_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_OUT_HIGH); + if (IS_ERR(priv->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(priv->reset_gpio), + "failed to get reset GPIO\n"); + + ret = devm_regulator_bulk_get_const(dev, ARRAY_SIZE(ssd2825_supplies), + ssd2825_supplies, &priv->supplies); + if (ret) + return dev_err_probe(dev, ret, "failed to get regulators\n"); + + priv->hzd = 133; /* ns */ + device_property_read_u32(dev, "solomon,hs-zero-delay-ns", &priv->hzd); + + priv->hpd = 40; /* ns */ + device_property_read_u32(dev, "solomon,hs-prep-delay-ns", &priv->hpd); + + priv->dsi_host.dev = dev; + priv->dsi_host.ops = &ssd2825_dsi_host_ops; + + priv->bridge.timings = &default_ssd2825_timings; + priv->bridge.of_node = np; + + return mipi_dsi_host_register(&priv->dsi_host); +} + +static void ssd2825_remove(struct spi_device *spi) +{ + struct ssd2825_priv *priv = spi_get_drvdata(spi); + + mipi_dsi_host_unregister(&priv->dsi_host); +} + +static const struct of_device_id ssd2825_of_match[] = { + { .compatible = "solomon,ssd2825" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ssd2825_of_match); + +static struct spi_driver ssd2825_driver = { + .driver = { + .name = "ssd2825", + .of_match_table = ssd2825_of_match, + }, + .probe = ssd2825_probe, + .remove = ssd2825_remove, +}; +module_spi_driver(ssd2825_driver); + +MODULE_AUTHOR("Svyatoslav Ryhel <clamor95@gmail.com>"); +MODULE_DESCRIPTION("Solomon SSD2825 RGB to MIPI-DSI bridge driver SPI"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index f3ab2f985f8c..2c5e532410de 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -1,4 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only +config DRM_DW_DP + tristate + select DRM_DISPLAY_HELPER + select DRM_DISPLAY_DP_HELPER + select DRM_KMS_HELPER + select REGMAP_MMIO + config DRM_DW_HDMI tristate select DRM_DISPLAY_HDMI_HELPER diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile index 9dc376d220ad..4dada44029ac 100644 --- a/drivers/gpu/drm/bridge/synopsys/Makefile +++ b/drivers/gpu/drm/bridge/synopsys/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_DRM_DW_DP) += dw-dp.o obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o obj-$(CONFIG_DRM_DW_HDMI_GP_AUDIO) += dw-hdmi-gp-audio.o diff --git a/drivers/gpu/drm/bridge/synopsys/dw-dp.c b/drivers/gpu/drm/bridge/synopsys/dw-dp.c new file mode 100644 index 000000000000..9bbfe8da3de0 --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-dp.c @@ -0,0 +1,2095 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synopsys DesignWare Cores DisplayPort Transmitter Controller + * + * Copyright (c) 2025 Rockchip Electronics Co., Ltd. + * + * Author: Andy Yan <andy.yan@rock-chips.com> + */ +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/irq.h> +#include <linux/media-bus-format.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/phy/phy.h> +#include <linux/unaligned.h> + +#include <drm/bridge/dw_dp.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> +#include <drm/display/drm_dp_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_of.h> +#include <drm/drm_print.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> + +#define DW_DP_VERSION_NUMBER 0x0000 +#define DW_DP_VERSION_TYPE 0x0004 +#define DW_DP_ID 0x0008 + +#define DW_DP_CONFIG_REG1 0x0100 +#define DW_DP_CONFIG_REG2 0x0104 +#define DW_DP_CONFIG_REG3 0x0108 + +#define DW_DP_CCTL 0x0200 +#define FORCE_HPD BIT(4) +#define DEFAULT_FAST_LINK_TRAIN_EN BIT(2) +#define ENHANCE_FRAMING_EN BIT(1) +#define SCRAMBLE_DIS BIT(0) +#define DW_DP_SOFT_RESET_CTRL 0x0204 +#define VIDEO_RESET BIT(5) +#define AUX_RESET BIT(4) +#define AUDIO_SAMPLER_RESET BIT(3) +#define HDCP_MODULE_RESET BIT(2) +#define PHY_SOFT_RESET BIT(1) +#define CONTROLLER_RESET BIT(0) + +#define DW_DP_VSAMPLE_CTRL 0x0300 +#define PIXEL_MODE_SELECT GENMASK(22, 21) +#define VIDEO_MAPPING GENMASK(20, 16) +#define VIDEO_STREAM_ENABLE BIT(5) + +#define DW_DP_VSAMPLE_STUFF_CTRL1 0x0304 + +#define DW_DP_VSAMPLE_STUFF_CTRL2 0x0308 + +#define DW_DP_VINPUT_POLARITY_CTRL 0x030c +#define DE_IN_POLARITY BIT(2) +#define HSYNC_IN_POLARITY BIT(1) +#define VSYNC_IN_POLARITY BIT(0) + +#define DW_DP_VIDEO_CONFIG1 0x0310 +#define HACTIVE GENMASK(31, 16) +#define HBLANK GENMASK(15, 2) +#define I_P BIT(1) +#define R_V_BLANK_IN_OSC BIT(0) + +#define DW_DP_VIDEO_CONFIG2 0x0314 +#define VBLANK GENMASK(31, 16) +#define VACTIVE GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG3 0x0318 +#define H_SYNC_WIDTH GENMASK(31, 16) +#define H_FRONT_PORCH GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG4 0x031c +#define V_SYNC_WIDTH GENMASK(31, 16) +#define V_FRONT_PORCH GENMASK(15, 0) + +#define DW_DP_VIDEO_CONFIG5 0x0320 +#define INIT_THRESHOLD_HI GENMASK(22, 21) +#define AVERAGE_BYTES_PER_TU_FRAC GENMASK(19, 16) +#define INIT_THRESHOLD GENMASK(13, 7) +#define AVERAGE_BYTES_PER_TU GENMASK(6, 0) + +#define DW_DP_VIDEO_MSA1 0x0324 +#define VSTART GENMASK(31, 16) +#define HSTART GENMASK(15, 0) + +#define DW_DP_VIDEO_MSA2 0x0328 +#define MISC0 GENMASK(31, 24) + +#define DW_DP_VIDEO_MSA3 0x032c +#define MISC1 GENMASK(31, 24) + +#define DW_DP_VIDEO_HBLANK_INTERVAL 0x0330 +#define HBLANK_INTERVAL_EN BIT(16) +#define HBLANK_INTERVAL GENMASK(15, 0) + +#define DW_DP_AUD_CONFIG1 0x0400 +#define AUDIO_TIMESTAMP_VERSION_NUM GENMASK(29, 24) +#define AUDIO_PACKET_ID GENMASK(23, 16) +#define AUDIO_MUTE BIT(15) +#define NUM_CHANNELS GENMASK(14, 12) +#define HBR_MODE_ENABLE BIT(10) +#define AUDIO_DATA_WIDTH GENMASK(9, 5) +#define AUDIO_DATA_IN_EN GENMASK(4, 1) +#define AUDIO_INF_SELECT BIT(0) + +#define DW_DP_SDP_VERTICAL_CTRL 0x0500 +#define EN_VERTICAL_SDP BIT(2) +#define EN_AUDIO_STREAM_SDP BIT(1) +#define EN_AUDIO_TIMESTAMP_SDP BIT(0) +#define DW_DP_SDP_HORIZONTAL_CTRL 0x0504 +#define EN_HORIZONTAL_SDP BIT(2) +#define DW_DP_SDP_STATUS_REGISTER 0x0508 +#define DW_DP_SDP_MANUAL_CTRL 0x050c +#define DW_DP_SDP_STATUS_EN 0x0510 + +#define DW_DP_SDP_REGISTER_BANK 0x0600 +#define SDP_REGS GENMASK(31, 0) + +#define DW_DP_PHYIF_CTRL 0x0a00 +#define PHY_WIDTH BIT(25) +#define PHY_POWERDOWN GENMASK(20, 17) +#define PHY_BUSY GENMASK(15, 12) +#define SSC_DIS BIT(16) +#define XMIT_ENABLE GENMASK(11, 8) +#define PHY_LANES GENMASK(7, 6) +#define PHY_RATE GENMASK(5, 4) +#define TPS_SEL GENMASK(3, 0) + +#define DW_DP_PHY_TX_EQ 0x0a04 +#define DW_DP_CUSTOMPAT0 0x0a08 +#define DW_DP_CUSTOMPAT1 0x0a0c +#define DW_DP_CUSTOMPAT2 0x0a10 +#define DW_DP_HBR2_COMPLIANCE_SCRAMBLER_RESET 0x0a14 +#define DW_DP_PHYIF_PWRDOWN_CTRL 0x0a18 + +#define DW_DP_AUX_CMD 0x0b00 +#define AUX_CMD_TYPE GENMASK(31, 28) +#define AUX_ADDR GENMASK(27, 8) +#define I2C_ADDR_ONLY BIT(4) +#define AUX_LEN_REQ GENMASK(3, 0) + +#define DW_DP_AUX_STATUS 0x0b04 +#define AUX_TIMEOUT BIT(17) +#define AUX_BYTES_READ GENMASK(23, 19) +#define AUX_STATUS GENMASK(7, 4) + +#define DW_DP_AUX_DATA0 0x0b08 +#define DW_DP_AUX_DATA1 0x0b0c +#define DW_DP_AUX_DATA2 0x0b10 +#define DW_DP_AUX_DATA3 0x0b14 + +#define DW_DP_GENERAL_INTERRUPT 0x0d00 +#define VIDEO_FIFO_OVERFLOW_STREAM0 BIT(6) +#define AUDIO_FIFO_OVERFLOW_STREAM0 BIT(5) +#define SDP_EVENT_STREAM0 BIT(4) +#define AUX_CMD_INVALID BIT(3) +#define HDCP_EVENT BIT(2) +#define AUX_REPLY_EVENT BIT(1) +#define HPD_EVENT BIT(0) + +#define DW_DP_GENERAL_INTERRUPT_ENABLE 0x0d04 +#define HDCP_EVENT_EN BIT(2) +#define AUX_REPLY_EVENT_EN BIT(1) +#define HPD_EVENT_EN BIT(0) + +#define DW_DP_HPD_STATUS 0x0d08 +#define HPD_STATE GENMASK(11, 9) +#define HPD_STATUS BIT(8) +#define HPD_HOT_UNPLUG BIT(2) +#define HPD_HOT_PLUG BIT(1) +#define HPD_IRQ BIT(0) + +#define DW_DP_HPD_INTERRUPT_ENABLE 0x0d0c +#define HPD_UNPLUG_ERR_EN BIT(3) +#define HPD_UNPLUG_EN BIT(2) +#define HPD_PLUG_EN BIT(1) +#define HPD_IRQ_EN BIT(0) + +#define DW_DP_HDCP_CFG 0x0e00 +#define DPCD12PLUS BIT(7) +#define CP_IRQ BIT(6) +#define BYPENCRYPTION BIT(5) +#define HDCP_LOCK BIT(4) +#define ENCRYPTIONDISABLE BIT(3) +#define ENABLE_HDCP_13 BIT(2) +#define ENABLE_HDCP BIT(1) + +#define DW_DP_HDCP_OBS 0x0e04 +#define HDCP22_RE_AUTHENTICATION_REQ BIT(31) +#define HDCP22_AUTHENTICATION_FAILED BIT(30) +#define HDCP22_AUTHENTICATION_SUCCESS BIT(29) +#define HDCP22_CAPABLE_SINK BIT(28) +#define HDCP22_SINK_CAP_CHECK_COMPLETE BIT(27) +#define HDCP22_STATE GENMASK(26, 24) +#define HDCP22_BOOTED BIT(23) +#define HDCP13_BSTATUS GENMASK(22, 19) +#define REPEATER BIT(18) +#define HDCP_CAPABLE BIT(17) +#define STATEE GENMASK(16, 14) +#define STATEOEG GENMASK(13, 11) +#define STATER GENMASK(10, 8) +#define STATEA GENMASK(7, 4) +#define SUBSTATEA GENMASK(3, 1) +#define HDCPENGAGED BIT(0) + +#define DW_DP_HDCP_APIINTCLR 0x0e08 +#define DW_DP_HDCP_APIINTSTAT 0x0e0c +#define DW_DP_HDCP_APIINTMSK 0x0e10 +#define HDCP22_GPIOINT BIT(8) +#define HDCP_ENGAGED BIT(7) +#define HDCP_FAILED BIT(6) +#define KSVSHA1CALCDONEINT BIT(5) +#define AUXRESPNACK7TIMES BIT(4) +#define AUXRESPTIMEOUT BIT(3) +#define AUXRESPDEFER7TIMES BIT(2) +#define KSVACCESSINT BIT(0) + +#define DW_DP_HDCP_KSVMEMCTRL 0x0e18 +#define KSVSHA1STATUS BIT(4) +#define KSVMEMACCESS BIT(1) +#define KSVMEMREQUEST BIT(0) + +#define DW_DP_HDCP_REG_BKSV0 0x3600 +#define DW_DP_HDCP_REG_BKSV1 0x3604 +#define DW_DP_HDCP_REG_ANCONF 0x3608 +#define AN_BYPASS BIT(0) + +#define DW_DP_HDCP_REG_AN0 0x360c +#define DW_DP_HDCP_REG_AN1 0x3610 +#define DW_DP_HDCP_REG_RMLCTL 0x3614 +#define ODPK_DECRYPT_ENABLE BIT(0) + +#define DW_DP_HDCP_REG_RMLSTS 0x3618 +#define IDPK_WR_OK_STS BIT(6) +#define IDPK_DATA_INDEX GENMASK(5, 0) +#define DW_DP_HDCP_REG_SEED 0x361c +#define DW_DP_HDCP_REG_DPK0 0x3620 +#define DW_DP_HDCP_REG_DPK1 0x3624 +#define DW_DP_HDCP22_GPIOSTS 0x3628 +#define DW_DP_HDCP22_GPIOCHNGSTS 0x362c +#define DW_DP_HDCP_REG_DPK_CRC 0x3630 + +#define DW_DP_MAX_REGISTER DW_DP_HDCP_REG_DPK_CRC + +#define SDP_REG_BANK_SIZE 16 + +struct dw_dp_link_caps { + bool enhanced_framing; + bool tps3_supported; + bool tps4_supported; + bool fast_training; + bool channel_coding; + bool ssc; +}; + +struct dw_dp_link_train_set { + unsigned int voltage_swing[4]; + unsigned int pre_emphasis[4]; + bool voltage_max_reached[4]; + bool pre_max_reached[4]; +}; + +struct dw_dp_link_train { + struct dw_dp_link_train_set adjust; + bool clock_recovered; + bool channel_equalized; +}; + +struct dw_dp_link { + u8 dpcd[DP_RECEIVER_CAP_SIZE]; + unsigned char revision; + unsigned int rate; + unsigned int lanes; + u8 sink_count; + u8 vsc_sdp_supported; + struct dw_dp_link_caps caps; + struct dw_dp_link_train train; + struct drm_dp_desc desc; +}; + +struct dw_dp_bridge_state { + struct drm_bridge_state base; + struct drm_display_mode mode; + u8 video_mapping; + u8 color_format; + u8 bpc; + u8 bpp; +}; + +struct dw_dp_sdp { + struct dp_sdp base; + unsigned long flags; +}; + +struct dw_dp_hotplug { + bool long_hpd; +}; + +struct dw_dp { + struct drm_bridge bridge; + struct device *dev; + struct regmap *regmap; + struct phy *phy; + struct clk *apb_clk; + struct clk *aux_clk; + struct clk *i2s_clk; + struct clk *spdif_clk; + struct clk *hdcp_clk; + struct reset_control *rstc; + struct completion complete; + int irq; + struct work_struct hpd_work; + struct dw_dp_hotplug hotplug; + /* Serialize hpd status access */ + struct mutex irq_lock; + + struct drm_dp_aux aux; + + struct dw_dp_link link; + struct dw_dp_plat_data plat_data; + u8 pixel_mode; + + DECLARE_BITMAP(sdp_reg_bank, SDP_REG_BANK_SIZE); +}; + +enum { + DW_DP_RGB_6BIT, + DW_DP_RGB_8BIT, + DW_DP_RGB_10BIT, + DW_DP_RGB_12BIT, + DW_DP_RGB_16BIT, + DW_DP_YCBCR444_8BIT, + DW_DP_YCBCR444_10BIT, + DW_DP_YCBCR444_12BIT, + DW_DP_YCBCR444_16BIT, + DW_DP_YCBCR422_8BIT, + DW_DP_YCBCR422_10BIT, + DW_DP_YCBCR422_12BIT, + DW_DP_YCBCR422_16BIT, + DW_DP_YCBCR420_8BIT, + DW_DP_YCBCR420_10BIT, + DW_DP_YCBCR420_12BIT, + DW_DP_YCBCR420_16BIT, +}; + +enum { + DW_DP_MP_SINGLE_PIXEL, + DW_DP_MP_DUAL_PIXEL, + DW_DP_MP_QUAD_PIXEL, +}; + +enum { + DW_DP_SDP_VERTICAL_INTERVAL = BIT(0), + DW_DP_SDP_HORIZONTAL_INTERVAL = BIT(1), +}; + +enum { + DW_DP_HPD_STATE_IDLE, + DW_DP_HPD_STATE_UNPLUG, + DP_DP_HPD_STATE_TIMEOUT = 4, + DW_DP_HPD_STATE_PLUG = 7 +}; + +enum { + DW_DP_PHY_PATTERN_NONE, + DW_DP_PHY_PATTERN_TPS_1, + DW_DP_PHY_PATTERN_TPS_2, + DW_DP_PHY_PATTERN_TPS_3, + DW_DP_PHY_PATTERN_TPS_4, + DW_DP_PHY_PATTERN_SERM, + DW_DP_PHY_PATTERN_PBRS7, + DW_DP_PHY_PATTERN_CUSTOM_80BIT, + DW_DP_PHY_PATTERN_CP2520_1, + DW_DP_PHY_PATTERN_CP2520_2, +}; + +struct dw_dp_output_format { + u32 bus_format; + u32 color_format; + u8 video_mapping; + u8 bpc; + u8 bpp; +}; + +#define to_dw_dp_bridge_state(s) container_of(s, struct dw_dp_bridge_state, base) + +static const struct dw_dp_output_format dw_dp_output_formats[] = { + { MEDIA_BUS_FMT_RGB101010_1X30, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_10BIT, 10, 30 }, + { MEDIA_BUS_FMT_RGB888_1X24, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_8BIT, 8, 24 }, + { MEDIA_BUS_FMT_YUV10_1X30, DRM_COLOR_FORMAT_YCBCR444, DW_DP_YCBCR444_10BIT, 10, 30 }, + { MEDIA_BUS_FMT_YUV8_1X24, DRM_COLOR_FORMAT_YCBCR444, DW_DP_YCBCR444_8BIT, 8, 24}, + { MEDIA_BUS_FMT_YUYV10_1X20, DRM_COLOR_FORMAT_YCBCR422, DW_DP_YCBCR422_10BIT, 10, 20 }, + { MEDIA_BUS_FMT_YUYV8_1X16, DRM_COLOR_FORMAT_YCBCR422, DW_DP_YCBCR422_8BIT, 8, 16 }, + { MEDIA_BUS_FMT_UYYVYY10_0_5X30, DRM_COLOR_FORMAT_YCBCR420, DW_DP_YCBCR420_10BIT, 10, 15 }, + { MEDIA_BUS_FMT_UYYVYY8_0_5X24, DRM_COLOR_FORMAT_YCBCR420, DW_DP_YCBCR420_8BIT, 8, 12 }, + { MEDIA_BUS_FMT_RGB666_1X24_CPADHI, DRM_COLOR_FORMAT_RGB444, DW_DP_RGB_6BIT, 6, 18 }, +}; + +static const struct dw_dp_output_format *dw_dp_get_output_format(u32 bus_format) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dw_dp_output_formats); i++) + if (dw_dp_output_formats[i].bus_format == bus_format) + return &dw_dp_output_formats[i]; + + return NULL; +} + +static inline struct dw_dp *bridge_to_dp(struct drm_bridge *b) +{ + return container_of(b, struct dw_dp, bridge); +} + +static struct dw_dp_bridge_state *dw_dp_get_bridge_state(struct dw_dp *dp) +{ + struct dw_dp_bridge_state *dw_bridge_state; + struct drm_bridge_state *state; + + state = drm_priv_to_bridge_state(dp->bridge.base.state); + if (!state) + return NULL; + + dw_bridge_state = to_dw_dp_bridge_state(state); + if (!dw_bridge_state) + return NULL; + + return dw_bridge_state; +} + +static inline void dw_dp_phy_set_pattern(struct dw_dp *dp, u32 pattern) +{ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, TPS_SEL, + FIELD_PREP(TPS_SEL, pattern)); +} + +static void dw_dp_phy_xmit_enable(struct dw_dp *dp, u32 lanes) +{ + u32 xmit_enable; + + switch (lanes) { + case 4: + case 2: + case 1: + xmit_enable = GENMASK(lanes - 1, 0); + break; + case 0: + default: + xmit_enable = 0; + break; + } + + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, XMIT_ENABLE, + FIELD_PREP(XMIT_ENABLE, xmit_enable)); +} + +static bool dw_dp_bandwidth_ok(struct dw_dp *dp, + const struct drm_display_mode *mode, u32 bpp, + unsigned int lanes, unsigned int rate) +{ + u32 max_bw, req_bw; + + req_bw = mode->clock * bpp / 8; + max_bw = lanes * rate; + if (req_bw > max_bw) + return false; + + return true; +} + +static bool dw_dp_hpd_detect(struct dw_dp *dp) +{ + u32 value; + + regmap_read(dp->regmap, DW_DP_HPD_STATUS, &value); + + return FIELD_GET(HPD_STATE, value) == DW_DP_HPD_STATE_PLUG; +} + +static void dw_dp_link_caps_reset(struct dw_dp_link_caps *caps) +{ + caps->enhanced_framing = false; + caps->tps3_supported = false; + caps->tps4_supported = false; + caps->fast_training = false; + caps->channel_coding = false; +} + +static void dw_dp_link_reset(struct dw_dp_link *link) +{ + link->vsc_sdp_supported = 0; + link->sink_count = 0; + link->revision = 0; + link->rate = 0; + link->lanes = 0; + + dw_dp_link_caps_reset(&link->caps); + memset(link->dpcd, 0, sizeof(link->dpcd)); +} + +static int dw_dp_link_parse(struct dw_dp *dp, struct drm_connector *connector) +{ + struct dw_dp_link *link = &dp->link; + int ret; + + dw_dp_link_reset(link); + + ret = drm_dp_read_dpcd_caps(&dp->aux, link->dpcd); + if (ret < 0) + return ret; + + drm_dp_read_desc(&dp->aux, &link->desc, drm_dp_is_branch(link->dpcd)); + + if (drm_dp_read_sink_count_cap(connector, link->dpcd, &link->desc)) { + ret = drm_dp_read_sink_count(&dp->aux); + if (ret < 0) + return ret; + + link->sink_count = ret; + + /* Dongle connected, but no display */ + if (!link->sink_count) + return -ENODEV; + } + + link->vsc_sdp_supported = drm_dp_vsc_sdp_supported(&dp->aux, link->dpcd); + + link->revision = link->dpcd[DP_DPCD_REV]; + link->rate = min_t(u32, min(dp->plat_data.max_link_rate, + dp->phy->attrs.max_link_rate * 100), + drm_dp_max_link_rate(link->dpcd)); + link->lanes = min_t(u8, phy_get_bus_width(dp->phy), + drm_dp_max_lane_count(link->dpcd)); + + link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(link->dpcd); + link->caps.tps3_supported = drm_dp_tps3_supported(link->dpcd); + link->caps.tps4_supported = drm_dp_tps4_supported(link->dpcd); + link->caps.fast_training = drm_dp_fast_training_cap(link->dpcd); + link->caps.channel_coding = drm_dp_channel_coding_supported(link->dpcd); + link->caps.ssc = !!(link->dpcd[DP_MAX_DOWNSPREAD] & DP_MAX_DOWNSPREAD_0_5); + + return 0; +} + +static int dw_dp_link_train_update_vs_emph(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_link_train_set *train_set = &link->train.adjust; + unsigned int lanes = dp->link.lanes; + union phy_configure_opts phy_cfg; + unsigned int *vs, *pe; + int i, ret; + u8 buf[4]; + + vs = train_set->voltage_swing; + pe = train_set->pre_emphasis; + + for (i = 0; i < lanes; i++) { + phy_cfg.dp.voltage[i] = vs[i]; + phy_cfg.dp.pre[i] = pe[i]; + } + + phy_cfg.dp.set_lanes = false; + phy_cfg.dp.set_rate = false; + phy_cfg.dp.set_voltages = true; + + ret = phy_configure(dp->phy, &phy_cfg); + if (ret) + return ret; + + for (i = 0; i < lanes; i++) { + buf[i] = (vs[i] << DP_TRAIN_VOLTAGE_SWING_SHIFT) | + (pe[i] << DP_TRAIN_PRE_EMPHASIS_SHIFT); + if (train_set->voltage_max_reached[i]) + buf[i] |= DP_TRAIN_MAX_SWING_REACHED; + if (train_set->pre_max_reached[i]) + buf[i] |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED; + } + + ret = drm_dp_dpcd_write(&dp->aux, DP_TRAINING_LANE0_SET, buf, lanes); + if (ret < 0) + return ret; + + return 0; +} + +static int dw_dp_phy_configure(struct dw_dp *dp, unsigned int rate, + unsigned int lanes, bool ssc) +{ + union phy_configure_opts phy_cfg; + int ret; + + /* Move PHY to P3 */ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_POWERDOWN, + FIELD_PREP(PHY_POWERDOWN, 0x3)); + + phy_cfg.dp.lanes = lanes; + phy_cfg.dp.link_rate = rate / 100; + phy_cfg.dp.ssc = ssc; + phy_cfg.dp.set_lanes = true; + phy_cfg.dp.set_rate = true; + phy_cfg.dp.set_voltages = false; + ret = phy_configure(dp->phy, &phy_cfg); + if (ret) + return ret; + + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_LANES, + FIELD_PREP(PHY_LANES, lanes / 2)); + + /* Move PHY to P0 */ + regmap_update_bits(dp->regmap, DW_DP_PHYIF_CTRL, PHY_POWERDOWN, + FIELD_PREP(PHY_POWERDOWN, 0x0)); + + dw_dp_phy_xmit_enable(dp, lanes); + + return 0; +} + +static int dw_dp_link_configure(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 buf[2]; + int ret; + + ret = dw_dp_phy_configure(dp, link->rate, link->lanes, link->caps.ssc); + if (ret) + return ret; + + buf[0] = drm_dp_link_rate_to_bw_code(link->rate); + buf[1] = link->lanes; + + if (link->caps.enhanced_framing) { + buf[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + regmap_update_bits(dp->regmap, DW_DP_CCTL, ENHANCE_FRAMING_EN, + FIELD_PREP(ENHANCE_FRAMING_EN, 1)); + } else { + regmap_update_bits(dp->regmap, DW_DP_CCTL, ENHANCE_FRAMING_EN, + FIELD_PREP(ENHANCE_FRAMING_EN, 0)); + } + + ret = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, sizeof(buf)); + if (ret < 0) + return ret; + + buf[0] = link->caps.ssc ? DP_SPREAD_AMP_0_5 : 0; + buf[1] = link->caps.channel_coding ? DP_SET_ANSI_8B10B : 0; + + ret = drm_dp_dpcd_write(&dp->aux, DP_DOWNSPREAD_CTRL, buf, sizeof(buf)); + if (ret < 0) + return ret; + + return 0; +} + +static void dw_dp_link_train_init(struct dw_dp_link_train *train) +{ + struct dw_dp_link_train_set *adj = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) { + adj->voltage_swing[i] = 0; + adj->pre_emphasis[i] = 0; + adj->voltage_max_reached[i] = false; + adj->pre_max_reached[i] = false; + } + + train->clock_recovered = false; + train->channel_equalized = false; +} + +static bool dw_dp_link_train_valid(const struct dw_dp_link_train *train) +{ + return train->clock_recovered && train->channel_equalized; +} + +static int dw_dp_link_train_set_pattern(struct dw_dp *dp, u32 pattern) +{ + u8 buf = 0; + int ret; + + if (pattern && pattern != DP_TRAINING_PATTERN_4) { + buf |= DP_LINK_SCRAMBLING_DISABLE; + + regmap_update_bits(dp->regmap, DW_DP_CCTL, SCRAMBLE_DIS, + FIELD_PREP(SCRAMBLE_DIS, 1)); + } else { + regmap_update_bits(dp->regmap, DW_DP_CCTL, SCRAMBLE_DIS, + FIELD_PREP(SCRAMBLE_DIS, 0)); + } + + switch (pattern) { + case DP_TRAINING_PATTERN_DISABLE: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_NONE); + break; + case DP_TRAINING_PATTERN_1: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_1); + break; + case DP_TRAINING_PATTERN_2: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_2); + break; + case DP_TRAINING_PATTERN_3: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_3); + break; + case DP_TRAINING_PATTERN_4: + dw_dp_phy_set_pattern(dp, DW_DP_PHY_PATTERN_TPS_4); + break; + default: + return -EINVAL; + } + + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + buf | pattern); + if (ret < 0) + return ret; + + return 0; +} + +static u8 dw_dp_voltage_max(u8 preemph) +{ + switch (preemph & DP_TRAIN_PRE_EMPHASIS_MASK) { + case DP_TRAIN_PRE_EMPH_LEVEL_0: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; + case DP_TRAIN_PRE_EMPH_LEVEL_1: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + case DP_TRAIN_PRE_EMPH_LEVEL_2: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_1; + case DP_TRAIN_PRE_EMPH_LEVEL_3: + default: + return DP_TRAIN_VOLTAGE_SWING_LEVEL_0; + } +} + +static bool dw_dp_link_get_adjustments(struct dw_dp_link *link, + u8 status[DP_LINK_STATUS_SIZE]) +{ + struct dw_dp_link_train_set *adj = &link->train.adjust; + unsigned int i; + bool changed = false; + u8 v = 0; + u8 p = 0; + + for (i = 0; i < link->lanes; i++) { + v = drm_dp_get_adjust_request_voltage(status, i); + v >>= DP_TRAIN_VOLTAGE_SWING_SHIFT; + p = drm_dp_get_adjust_request_pre_emphasis(status, i); + p >>= DP_TRAIN_PRE_EMPHASIS_SHIFT; + + if (v != adj->voltage_swing[i] || p != adj->pre_emphasis[i]) + changed = true; + + if (p >= (DP_TRAIN_PRE_EMPH_LEVEL_3 >> DP_TRAIN_PRE_EMPHASIS_SHIFT)) { + adj->pre_emphasis[i] = DP_TRAIN_PRE_EMPH_LEVEL_3 >> + DP_TRAIN_PRE_EMPHASIS_SHIFT; + adj->pre_max_reached[i] = true; + } else { + adj->pre_emphasis[i] = p; + adj->pre_max_reached[i] = false; + } + + v = min(v, dw_dp_voltage_max(p)); + if (v >= (DP_TRAIN_VOLTAGE_SWING_LEVEL_3 >> DP_TRAIN_VOLTAGE_SWING_SHIFT)) { + adj->voltage_swing[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 >> + DP_TRAIN_VOLTAGE_SWING_SHIFT; + adj->voltage_max_reached[i] = true; + } else { + adj->voltage_swing[i] = v; + adj->voltage_max_reached[i] = false; + } + } + + return changed; +} + +static int dw_dp_link_clock_recovery(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 status[DP_LINK_STATUS_SIZE]; + unsigned int tries = 0; + int ret; + bool adj_changed; + + ret = dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + return ret; + + for (;;) { + ret = dw_dp_link_train_update_vs_emph(dp); + if (ret) + return ret; + + drm_dp_link_train_clock_recovery_delay(&dp->aux, link->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) { + dev_err(dp->dev, "failed to read link status: %d\n", ret); + return ret; + } + + if (drm_dp_clock_recovery_ok(status, link->lanes)) { + link->train.clock_recovered = true; + break; + } + + /* + * According to DP spec 1.4, if current ADJ is the same + * with previous REQ, we need to retry 5 times. + */ + adj_changed = dw_dp_link_get_adjustments(link, status); + if (!adj_changed) + tries++; + else + tries = 0; + + if (tries == 5) + break; + } + + return 0; +} + +static int dw_dp_link_channel_equalization(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 status[DP_LINK_STATUS_SIZE], pattern; + unsigned int tries; + int ret; + + if (link->caps.tps4_supported) + pattern = DP_TRAINING_PATTERN_4; + else if (link->caps.tps3_supported) + pattern = DP_TRAINING_PATTERN_3; + else + pattern = DP_TRAINING_PATTERN_2; + ret = dw_dp_link_train_set_pattern(dp, pattern); + if (ret) + return ret; + + for (tries = 1; tries < 5; tries++) { + ret = dw_dp_link_train_update_vs_emph(dp); + if (ret) + return ret; + + drm_dp_link_train_channel_eq_delay(&dp->aux, link->dpcd); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) + return ret; + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + dev_err(dp->dev, "clock recovery lost while equalizing channel\n"); + link->train.clock_recovered = false; + break; + } + + if (drm_dp_channel_eq_ok(status, link->lanes)) { + link->train.channel_equalized = true; + break; + } + + dw_dp_link_get_adjustments(link, status); + } + + return 0; +} + +static int dw_dp_link_downgrade(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + + state = dw_dp_get_bridge_state(dp); + + switch (link->rate) { + case 162000: + return -EINVAL; + case 270000: + link->rate = 162000; + break; + case 540000: + link->rate = 270000; + break; + case 810000: + link->rate = 540000; + break; + } + + if (!dw_dp_bandwidth_ok(dp, &state->mode, state->bpp, link->lanes, + link->rate)) + return -E2BIG; + + return 0; +} + +static int dw_dp_link_train_full(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + +retry: + dw_dp_link_train_init(&link->train); + + dev_dbg(dp->dev, "full-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", link->rate / 100); + + ret = dw_dp_link_configure(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to configure DP link: %d\n", ret); + return ret; + } + + ret = dw_dp_link_clock_recovery(dp); + if (ret < 0) { + dev_err(dp->dev, "clock recovery failed: %d\n", ret); + goto out; + } + + if (!link->train.clock_recovered) { + dev_err(dp->dev, "clock recovery failed, downgrading link\n"); + + ret = dw_dp_link_downgrade(dp); + if (ret < 0) + goto out; + else + goto retry; + } + + dev_dbg(dp->dev, "clock recovery succeeded\n"); + + ret = dw_dp_link_channel_equalization(dp); + if (ret < 0) { + dev_err(dp->dev, "channel equalization failed: %d\n", ret); + goto out; + } + + if (!link->train.channel_equalized) { + dev_err(dp->dev, "channel equalization failed, downgrading link\n"); + + ret = dw_dp_link_downgrade(dp); + if (ret < 0) + goto out; + else + goto retry; + } + + dev_dbg(dp->dev, "channel equalization succeeded\n"); + +out: + dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + return ret; +} + +static int dw_dp_link_train_fast(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + u8 status[DP_LINK_STATUS_SIZE]; + u8 pattern; + + dw_dp_link_train_init(&link->train); + + dev_dbg(dp->dev, "fast-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", link->rate / 100); + + ret = dw_dp_link_configure(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to configure DP link: %d\n", ret); + return ret; + } + + ret = dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_1); + if (ret) + goto out; + + usleep_range(500, 1000); + + if (link->caps.tps4_supported) + pattern = DP_TRAINING_PATTERN_4; + else if (link->caps.tps3_supported) + pattern = DP_TRAINING_PATTERN_3; + else + pattern = DP_TRAINING_PATTERN_2; + ret = dw_dp_link_train_set_pattern(dp, pattern); + if (ret) + goto out; + + usleep_range(500, 1000); + + ret = drm_dp_dpcd_read_link_status(&dp->aux, status); + if (ret < 0) { + dev_err(dp->dev, "failed to read link status: %d\n", ret); + goto out; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + dev_err(dp->dev, "clock recovery failed\n"); + ret = -EIO; + goto out; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) { + dev_err(dp->dev, "channel equalization failed\n"); + ret = -EIO; + goto out; + } + +out: + dw_dp_link_train_set_pattern(dp, DP_TRAINING_PATTERN_DISABLE); + return ret; +} + +static int dw_dp_link_train(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + int ret; + + if (link->caps.fast_training) { + if (dw_dp_link_train_valid(&link->train)) { + ret = dw_dp_link_train_fast(dp); + if (ret < 0) + dev_err(dp->dev, "fast link training failed: %d\n", ret); + else + return 0; + } + } + + ret = dw_dp_link_train_full(dp); + if (ret < 0) { + dev_err(dp->dev, "full link training failed: %d\n", ret); + return ret; + } + + return 0; +} + +static int dw_dp_send_sdp(struct dw_dp *dp, struct dw_dp_sdp *sdp) +{ + const u8 *payload = sdp->base.db; + u32 reg; + int i, nr; + + nr = find_first_zero_bit(dp->sdp_reg_bank, SDP_REG_BANK_SIZE); + if (nr < SDP_REG_BANK_SIZE) + set_bit(nr, dp->sdp_reg_bank); + else + return -EBUSY; + + reg = DW_DP_SDP_REGISTER_BANK + nr * 9 * 4; + + /* SDP header */ + regmap_write(dp->regmap, reg, get_unaligned_le32(&sdp->base.sdp_header)); + + /* SDP data payload */ + for (i = 1; i < 9; i++, payload += 4) + regmap_write(dp->regmap, reg + i * 4, + FIELD_PREP(SDP_REGS, get_unaligned_le32(payload))); + + if (sdp->flags & DW_DP_SDP_VERTICAL_INTERVAL) + regmap_update_bits(dp->regmap, DW_DP_SDP_VERTICAL_CTRL, + EN_VERTICAL_SDP << nr, + EN_VERTICAL_SDP << nr); + + if (sdp->flags & DW_DP_SDP_HORIZONTAL_INTERVAL) + regmap_update_bits(dp->regmap, DW_DP_SDP_HORIZONTAL_CTRL, + EN_HORIZONTAL_SDP << nr, + EN_HORIZONTAL_SDP << nr); + + return 0; +} + +static int dw_dp_send_vsc_sdp(struct dw_dp *dp) +{ + struct dw_dp_bridge_state *state; + struct dw_dp_sdp sdp = {}; + struct drm_dp_vsc_sdp vsc = {}; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + vsc.bpc = state->bpc; + + vsc.sdp_type = DP_SDP_VSC; + vsc.revision = 0x5; + vsc.length = 0x13; + vsc.content_type = DP_CONTENT_TYPE_NOT_DEFINED; + + sdp.flags = DW_DP_SDP_VERTICAL_INTERVAL; + + switch (state->color_format) { + case DRM_COLOR_FORMAT_YCBCR444: + vsc.pixelformat = DP_PIXELFORMAT_YUV444; + break; + case DRM_COLOR_FORMAT_YCBCR420: + vsc.pixelformat = DP_PIXELFORMAT_YUV420; + break; + case DRM_COLOR_FORMAT_YCBCR422: + vsc.pixelformat = DP_PIXELFORMAT_YUV422; + break; + case DRM_COLOR_FORMAT_RGB444: + default: + vsc.pixelformat = DP_PIXELFORMAT_RGB; + break; + } + + if (state->color_format == DRM_COLOR_FORMAT_RGB444) { + vsc.colorimetry = DP_COLORIMETRY_DEFAULT; + vsc.dynamic_range = DP_DYNAMIC_RANGE_VESA; + } else { + vsc.colorimetry = DP_COLORIMETRY_BT709_YCC; + vsc.dynamic_range = DP_DYNAMIC_RANGE_CTA; + } + + drm_dp_vsc_sdp_pack(&vsc, &sdp.base); + + return dw_dp_send_sdp(dp, &sdp); +} + +static int dw_dp_video_set_pixel_mode(struct dw_dp *dp) +{ + switch (dp->pixel_mode) { + case DW_DP_MP_SINGLE_PIXEL: + case DW_DP_MP_DUAL_PIXEL: + case DW_DP_MP_QUAD_PIXEL: + break; + default: + return -EINVAL; + } + + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, PIXEL_MODE_SELECT, + FIELD_PREP(PIXEL_MODE_SELECT, dp->pixel_mode)); + + return 0; +} + +static bool dw_dp_video_need_vsc_sdp(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + if (!link->vsc_sdp_supported) + return false; + + if (state->color_format == DRM_COLOR_FORMAT_YCBCR420) + return true; + + return false; +} + +static int dw_dp_video_set_msa(struct dw_dp *dp, u8 color_format, u8 bpc, + u16 vstart, u16 hstart) +{ + u16 misc = 0; + + if (dw_dp_video_need_vsc_sdp(dp)) + misc |= DP_MSA_MISC_COLOR_VSC_SDP; + + switch (color_format) { + case DRM_COLOR_FORMAT_RGB444: + misc |= DP_MSA_MISC_COLOR_RGB; + break; + case DRM_COLOR_FORMAT_YCBCR444: + misc |= DP_MSA_MISC_COLOR_YCBCR_444_BT709; + break; + case DRM_COLOR_FORMAT_YCBCR422: + misc |= DP_MSA_MISC_COLOR_YCBCR_422_BT709; + break; + case DRM_COLOR_FORMAT_YCBCR420: + break; + default: + return -EINVAL; + } + + switch (bpc) { + case 6: + misc |= DP_MSA_MISC_6_BPC; + break; + case 8: + misc |= DP_MSA_MISC_8_BPC; + break; + case 10: + misc |= DP_MSA_MISC_10_BPC; + break; + case 12: + misc |= DP_MSA_MISC_12_BPC; + break; + case 16: + misc |= DP_MSA_MISC_16_BPC; + break; + default: + return -EINVAL; + } + + regmap_write(dp->regmap, DW_DP_VIDEO_MSA1, + FIELD_PREP(VSTART, vstart) | FIELD_PREP(HSTART, hstart)); + regmap_write(dp->regmap, DW_DP_VIDEO_MSA2, FIELD_PREP(MISC0, misc)); + regmap_write(dp->regmap, DW_DP_VIDEO_MSA3, FIELD_PREP(MISC1, misc >> 8)); + + return 0; +} + +static void dw_dp_video_disable(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_STREAM_ENABLE, + FIELD_PREP(VIDEO_STREAM_ENABLE, 0)); +} + +static int dw_dp_video_enable(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + struct dw_dp_bridge_state *state; + struct drm_display_mode *mode; + u8 color_format, bpc, bpp; + u8 init_threshold, vic; + u32 hstart, hactive, hblank, h_sync_width, h_front_porch; + u32 vstart, vactive, vblank, v_sync_width, v_front_porch; + u32 peak_stream_bandwidth, link_bandwidth; + u32 average_bytes_per_tu, average_bytes_per_tu_frac; + u32 ts, hblank_interval; + u32 value; + int ret; + + state = dw_dp_get_bridge_state(dp); + if (!state) + return -EINVAL; + + bpc = state->bpc; + bpp = state->bpp; + color_format = state->color_format; + mode = &state->mode; + + vstart = mode->vtotal - mode->vsync_start; + hstart = mode->htotal - mode->hsync_start; + + ret = dw_dp_video_set_pixel_mode(dp); + if (ret) + return ret; + + ret = dw_dp_video_set_msa(dp, color_format, bpc, vstart, hstart); + if (ret) + return ret; + + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_MAPPING, + FIELD_PREP(VIDEO_MAPPING, state->video_mapping)); + + /* Configure DW_DP_VINPUT_POLARITY_CTRL register */ + value = 0; + if (mode->flags & DRM_MODE_FLAG_PHSYNC) + value |= FIELD_PREP(HSYNC_IN_POLARITY, 1); + if (mode->flags & DRM_MODE_FLAG_PVSYNC) + value |= FIELD_PREP(VSYNC_IN_POLARITY, 1); + regmap_write(dp->regmap, DW_DP_VINPUT_POLARITY_CTRL, value); + + /* Configure DW_DP_VIDEO_CONFIG1 register */ + hactive = mode->hdisplay; + hblank = mode->htotal - mode->hdisplay; + value = FIELD_PREP(HACTIVE, hactive) | FIELD_PREP(HBLANK, hblank); + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + value |= FIELD_PREP(I_P, 1); + vic = drm_match_cea_mode(mode); + if (vic == 5 || vic == 6 || vic == 7 || + vic == 10 || vic == 11 || vic == 20 || + vic == 21 || vic == 22 || vic == 39 || + vic == 25 || vic == 26 || vic == 40 || + vic == 44 || vic == 45 || vic == 46 || + vic == 50 || vic == 51 || vic == 54 || + vic == 55 || vic == 58 || vic == 59) + value |= R_V_BLANK_IN_OSC; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG1, value); + + /* Configure DW_DP_VIDEO_CONFIG2 register */ + vblank = mode->vtotal - mode->vdisplay; + vactive = mode->vdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG2, + FIELD_PREP(VBLANK, vblank) | FIELD_PREP(VACTIVE, vactive)); + + /* Configure DW_DP_VIDEO_CONFIG3 register */ + h_sync_width = mode->hsync_end - mode->hsync_start; + h_front_porch = mode->hsync_start - mode->hdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG3, + FIELD_PREP(H_SYNC_WIDTH, h_sync_width) | + FIELD_PREP(H_FRONT_PORCH, h_front_porch)); + + /* Configure DW_DP_VIDEO_CONFIG4 register */ + v_sync_width = mode->vsync_end - mode->vsync_start; + v_front_porch = mode->vsync_start - mode->vdisplay; + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG4, + FIELD_PREP(V_SYNC_WIDTH, v_sync_width) | + FIELD_PREP(V_FRONT_PORCH, v_front_porch)); + + /* Configure DW_DP_VIDEO_CONFIG5 register */ + peak_stream_bandwidth = mode->clock * bpp / 8; + link_bandwidth = (link->rate / 1000) * link->lanes; + ts = peak_stream_bandwidth * 64 / link_bandwidth; + average_bytes_per_tu = ts / 1000; + average_bytes_per_tu_frac = ts / 100 - average_bytes_per_tu * 10; + if (dp->pixel_mode == DW_DP_MP_SINGLE_PIXEL) { + if (average_bytes_per_tu < 6) + init_threshold = 32; + else if (hblank <= 80 && color_format != DRM_COLOR_FORMAT_YCBCR420) + init_threshold = 12; + else if (hblank <= 40 && color_format == DRM_COLOR_FORMAT_YCBCR420) + init_threshold = 3; + else + init_threshold = 16; + } else { + u32 t1 = 0, t2 = 0, t3 = 0; + + switch (bpc) { + case 6: + t1 = (4 * 1000 / 9) * link->lanes; + break; + case 8: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) { + t1 = (1000 / 2) * link->lanes; + } else { + if (dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 3) * link->lanes; + else + t1 = (3000 / 16) * link->lanes; + } + break; + case 10: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) + t1 = (2000 / 5) * link->lanes; + else + t1 = (4000 / 15) * link->lanes; + break; + case 12: + if (color_format == DRM_COLOR_FORMAT_YCBCR422) { + if (dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 6) * link->lanes; + else + t1 = (1000 / 3) * link->lanes; + } else { + t1 = (2000 / 9) * link->lanes; + } + break; + case 16: + if (color_format != DRM_COLOR_FORMAT_YCBCR422 && + dp->pixel_mode == DW_DP_MP_DUAL_PIXEL) + t1 = (1000 / 6) * link->lanes; + else + t1 = (1000 / 4) * link->lanes; + break; + default: + return -EINVAL; + } + + if (color_format == DRM_COLOR_FORMAT_YCBCR420) + t2 = (link->rate / 4) * 1000 / (mode->clock / 2); + else + t2 = (link->rate / 4) * 1000 / mode->clock; + + if (average_bytes_per_tu_frac) + t3 = average_bytes_per_tu + 1; + else + t3 = average_bytes_per_tu; + init_threshold = t1 * t2 * t3 / (1000 * 1000); + if (init_threshold <= 16 || average_bytes_per_tu < 10) + init_threshold = 40; + } + + regmap_write(dp->regmap, DW_DP_VIDEO_CONFIG5, + FIELD_PREP(INIT_THRESHOLD_HI, init_threshold >> 6) | + FIELD_PREP(AVERAGE_BYTES_PER_TU_FRAC, average_bytes_per_tu_frac) | + FIELD_PREP(INIT_THRESHOLD, init_threshold) | + FIELD_PREP(AVERAGE_BYTES_PER_TU, average_bytes_per_tu)); + + /* Configure DW_DP_VIDEO_HBLANK_INTERVAL register */ + hblank_interval = hblank * (link->rate / 4) / mode->clock; + regmap_write(dp->regmap, DW_DP_VIDEO_HBLANK_INTERVAL, + FIELD_PREP(HBLANK_INTERVAL_EN, 1) | + FIELD_PREP(HBLANK_INTERVAL, hblank_interval)); + + /* Video stream enable */ + regmap_update_bits(dp->regmap, DW_DP_VSAMPLE_CTRL, VIDEO_STREAM_ENABLE, + FIELD_PREP(VIDEO_STREAM_ENABLE, 1)); + + if (dw_dp_video_need_vsc_sdp(dp)) + dw_dp_send_vsc_sdp(dp); + + return 0; +} + +static void dw_dp_hpd_init(struct dw_dp *dp) +{ + /* Enable all HPD interrupts */ + regmap_update_bits(dp->regmap, DW_DP_HPD_INTERRUPT_ENABLE, + HPD_UNPLUG_EN | HPD_PLUG_EN | HPD_IRQ_EN, + FIELD_PREP(HPD_UNPLUG_EN, 1) | + FIELD_PREP(HPD_PLUG_EN, 1) | + FIELD_PREP(HPD_IRQ_EN, 1)); + + /* Enable all top-level interrupts */ + regmap_update_bits(dp->regmap, DW_DP_GENERAL_INTERRUPT_ENABLE, + HPD_EVENT_EN, FIELD_PREP(HPD_EVENT_EN, 1)); +} + +static void dw_dp_aux_init(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_GENERAL_INTERRUPT_ENABLE, + AUX_REPLY_EVENT_EN, FIELD_PREP(AUX_REPLY_EVENT_EN, 1)); +} + +static void dw_dp_init_hw(struct dw_dp *dp) +{ + regmap_update_bits(dp->regmap, DW_DP_CCTL, DEFAULT_FAST_LINK_TRAIN_EN, + FIELD_PREP(DEFAULT_FAST_LINK_TRAIN_EN, 0)); + + dw_dp_hpd_init(dp); + dw_dp_aux_init(dp); +} + +static int dw_dp_aux_write_data(struct dw_dp *dp, const u8 *buffer, size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value = 0; + + for (j = 0; j < num; j++) + value |= buffer[i * 4 + j] << (j * 8); + + regmap_write(dp->regmap, DW_DP_AUX_DATA0 + i * 4, value); + } + + return size; +} + +static int dw_dp_aux_read_data(struct dw_dp *dp, u8 *buffer, size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value; + + regmap_read(dp->regmap, DW_DP_AUX_DATA0 + i * 4, &value); + + for (j = 0; j < num; j++) + buffer[i * 4 + j] = value >> (j * 8); + } + + return size; +} + +static ssize_t dw_dp_aux_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + struct dw_dp *dp = container_of(aux, struct dw_dp, aux); + unsigned long timeout = msecs_to_jiffies(10); + u32 status, value; + ssize_t ret = 0; + + if (WARN_ON(msg->size > 16)) + return -E2BIG; + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_NATIVE_WRITE: + case DP_AUX_I2C_WRITE: + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + ret = dw_dp_aux_write_data(dp, msg->buffer, msg->size); + if (ret < 0) + return ret; + break; + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ: + break; + default: + return -EINVAL; + } + + if (msg->size > 0) + value = FIELD_PREP(AUX_LEN_REQ, msg->size - 1); + else + value = FIELD_PREP(I2C_ADDR_ONLY, 1); + value |= FIELD_PREP(AUX_CMD_TYPE, msg->request); + value |= FIELD_PREP(AUX_ADDR, msg->address); + regmap_write(dp->regmap, DW_DP_AUX_CMD, value); + + status = wait_for_completion_timeout(&dp->complete, timeout); + if (!status) { + dev_err(dp->dev, "timeout waiting for AUX reply\n"); + return -ETIMEDOUT; + } + + regmap_read(dp->regmap, DW_DP_AUX_STATUS, &value); + if (value & AUX_TIMEOUT) + return -ETIMEDOUT; + + msg->reply = FIELD_GET(AUX_STATUS, value); + + if (msg->size > 0 && msg->reply == DP_AUX_NATIVE_REPLY_ACK) { + if (msg->request & DP_AUX_I2C_READ) { + size_t count = FIELD_GET(AUX_BYTES_READ, value) - 1; + + if (count != msg->size) + return -EBUSY; + + ret = dw_dp_aux_read_data(dp, msg->buffer, count); + if (ret < 0) + return ret; + } + } + + return ret; +} + +/* + * Limits for the video timing for DP: + * 1. the hfp should be 2 pixels aligned; + * 2. the minimum hsync should be 9 pixel; + * 3. the minimum hbp should be 16 pixel; + */ +static int dw_dp_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_bridge_state *state; + const struct dw_dp_output_format *fmt; + struct drm_display_mode *mode; + int min_hbp = 16; + int min_hsync = 9; + + state = to_dw_dp_bridge_state(bridge_state); + mode = &state->mode; + + fmt = dw_dp_get_output_format(bridge_state->output_bus_cfg.format); + if (!fmt) + return -EINVAL; + + state->video_mapping = fmt->video_mapping; + state->color_format = fmt->color_format; + state->bpc = fmt->bpc; + state->bpp = fmt->bpp; + + if ((adjusted_mode->hsync_start - adjusted_mode->hdisplay) & 0x1) { + adjusted_mode->hsync_start += 1; + dev_warn(dp->dev, "hfp is not 2 pixeel aligned, fixup to aligned hfp\n"); + } + + if (adjusted_mode->hsync_end - adjusted_mode->hsync_start < min_hsync) { + adjusted_mode->hsync_end = adjusted_mode->hsync_start + min_hsync; + dev_warn(dp->dev, "hsync is too narrow, fixup to min hsync:%d\n", min_hsync); + } + + if (adjusted_mode->htotal - adjusted_mode->hsync_end < min_hbp) { + adjusted_mode->htotal = adjusted_mode->hsync_end + min_hbp; + dev_warn(dp->dev, "hbp is too narrow, fixup to min hbp:%d\n", min_hbp); + } + + drm_mode_copy(mode, adjusted_mode); + + return 0; +} + +static enum drm_mode_status dw_dp_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_link *link = &dp->link; + u32 min_bpp; + + if (info->color_formats & DRM_COLOR_FORMAT_YCBCR420 && + link->vsc_sdp_supported && + (drm_mode_is_420_only(info, mode) || drm_mode_is_420_also(info, mode))) + min_bpp = 12; + else if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422) + min_bpp = 16; + else if (info->color_formats & DRM_COLOR_FORMAT_RGB444) + min_bpp = 18; + else + min_bpp = 24; + + if (!link->vsc_sdp_supported && + drm_mode_is_420_only(info, mode)) + return MODE_NO_420; + + if (!dw_dp_bandwidth_ok(dp, mode, min_bpp, link->lanes, link->rate)) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static bool dw_dp_needs_link_retrain(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + u8 link_status[DP_LINK_STATUS_SIZE]; + + if (!dw_dp_link_train_valid(&link->train)) + return false; + + if (drm_dp_dpcd_read_link_status(&dp->aux, link_status) < 0) + return false; + + /* Retrain if Channel EQ or CR not ok */ + return !drm_dp_channel_eq_ok(link_status, dp->link.lanes); +} + +static void dw_dp_link_disable(struct dw_dp *dp) +{ + struct dw_dp_link *link = &dp->link; + + if (dw_dp_hpd_detect(dp)) + drm_dp_link_power_down(&dp->aux, dp->link.revision); + + dw_dp_phy_xmit_enable(dp, 0); + + phy_power_off(dp->phy); + + link->train.clock_recovered = false; + link->train.channel_equalized = false; +} + +static int dw_dp_link_enable(struct dw_dp *dp) +{ + int ret; + + ret = phy_power_on(dp->phy); + if (ret) + return ret; + + ret = drm_dp_link_power_up(&dp->aux, dp->link.revision); + if (ret < 0) + return ret; + + ret = dw_dp_link_train(dp); + + return ret; +} + +static void dw_dp_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct drm_connector *connector; + struct drm_connector_state *conn_state; + int ret; + + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + if (!connector) { + dev_err(dp->dev, "failed to get connector\n"); + return; + } + + conn_state = drm_atomic_get_new_connector_state(state, connector); + if (!conn_state) { + dev_err(dp->dev, "failed to get connector state\n"); + return; + } + + set_bit(0, dp->sdp_reg_bank); + + ret = dw_dp_link_enable(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to enable link: %d\n", ret); + return; + } + + ret = dw_dp_video_enable(dp); + if (ret < 0) { + dev_err(dp->dev, "failed to enable video: %d\n", ret); + return; + } +} + +static void dw_dp_reset(struct dw_dp *dp) +{ + int val; + + disable_irq(dp->irq); + regmap_update_bits(dp->regmap, DW_DP_SOFT_RESET_CTRL, CONTROLLER_RESET, + FIELD_PREP(CONTROLLER_RESET, 1)); + usleep_range(10, 20); + regmap_update_bits(dp->regmap, DW_DP_SOFT_RESET_CTRL, CONTROLLER_RESET, + FIELD_PREP(CONTROLLER_RESET, 0)); + + dw_dp_init_hw(dp); + regmap_read_poll_timeout(dp->regmap, DW_DP_HPD_STATUS, val, + FIELD_GET(HPD_HOT_PLUG, val), 200, 200000); + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_PLUG); + enable_irq(dp->irq); +} + +static void dw_dp_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + + dw_dp_video_disable(dp); + dw_dp_link_disable(dp); + bitmap_zero(dp->sdp_reg_bank, SDP_REG_BANK_SIZE); + dw_dp_reset(dp); +} + +static bool dw_dp_hpd_detect_link(struct dw_dp *dp, struct drm_connector *connector) +{ + int ret; + + ret = phy_power_on(dp->phy); + if (ret < 0) + return false; + ret = dw_dp_link_parse(dp, connector); + phy_power_off(dp->phy); + + return !ret; +} + +static enum drm_connector_status dw_dp_bridge_detect(struct drm_bridge *bridge, + struct drm_connector *connector) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + + if (!dw_dp_hpd_detect(dp)) + return connector_status_disconnected; + + if (!dw_dp_hpd_detect_link(dp, connector)) + return connector_status_disconnected; + + return connector_status_connected; +} + +static const struct drm_edid *dw_dp_bridge_edid_read(struct drm_bridge *bridge, + struct drm_connector *connector) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + const struct drm_edid *edid; + int ret; + + ret = phy_power_on(dp->phy); + if (ret) + return NULL; + + edid = drm_edid_read_ddc(connector, &dp->aux.ddc); + + phy_power_off(dp->phy); + + return edid; +} + +static u32 *dw_dp_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + unsigned int *num_output_fmts) +{ + struct dw_dp *dp = bridge_to_dp(bridge); + struct dw_dp_link *link = &dp->link; + struct drm_display_info *di = &conn_state->connector->display_info; + struct drm_display_mode mode = crtc_state->mode; + const struct dw_dp_output_format *fmt; + u32 i, j = 0; + u32 *output_fmts; + + *num_output_fmts = 0; + + output_fmts = kcalloc(ARRAY_SIZE(dw_dp_output_formats), sizeof(*output_fmts), GFP_KERNEL); + if (!output_fmts) + return NULL; + + for (i = 0; i < ARRAY_SIZE(dw_dp_output_formats); i++) { + fmt = &dw_dp_output_formats[i]; + + if (fmt->bpc > conn_state->max_bpc) + continue; + + if (!(fmt->color_format & di->color_formats)) + continue; + + if (fmt->color_format == DRM_COLOR_FORMAT_YCBCR420 && + !link->vsc_sdp_supported) + continue; + + if (fmt->color_format != DRM_COLOR_FORMAT_YCBCR420 && + drm_mode_is_420_only(di, &mode)) + continue; + + if (!dw_dp_bandwidth_ok(dp, &mode, fmt->bpp, link->lanes, link->rate)) + continue; + + output_fmts[j++] = fmt->bus_format; + } + + *num_output_fmts = j; + + return output_fmts; +} + +static struct drm_bridge_state *dw_dp_bridge_atomic_duplicate_state(struct drm_bridge *bridge) +{ + struct dw_dp_bridge_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_bridge_duplicate_state(bridge, &state->base); + + return &state->base; +} + +static const struct drm_bridge_funcs dw_dp_bridge_funcs = { + .atomic_duplicate_state = dw_dp_bridge_atomic_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_get_input_bus_fmts = drm_atomic_helper_bridge_propagate_bus_fmt, + .atomic_get_output_bus_fmts = dw_dp_bridge_atomic_get_output_bus_fmts, + .atomic_check = dw_dp_bridge_atomic_check, + .mode_valid = dw_dp_bridge_mode_valid, + .atomic_enable = dw_dp_bridge_atomic_enable, + .atomic_disable = dw_dp_bridge_atomic_disable, + .detect = dw_dp_bridge_detect, + .edid_read = dw_dp_bridge_edid_read, +}; + +static int dw_dp_link_retrain(struct dw_dp *dp) +{ + struct drm_device *dev = dp->bridge.dev; + struct drm_modeset_acquire_ctx ctx; + int ret; + + if (!dw_dp_needs_link_retrain(dp)) + return 0; + + dev_dbg(dp->dev, "Retraining link\n"); + + drm_modeset_acquire_init(&ctx, 0); + for (;;) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (!ret) + ret = dw_dp_link_train(dp); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + + return ret; +} + +static void dw_dp_hpd_work(struct work_struct *work) +{ + struct dw_dp *dp = container_of(work, struct dw_dp, hpd_work); + bool long_hpd; + int ret; + + mutex_lock(&dp->irq_lock); + long_hpd = dp->hotplug.long_hpd; + mutex_unlock(&dp->irq_lock); + + dev_dbg(dp->dev, "[drm] Get hpd irq - %s\n", long_hpd ? "long" : "short"); + + if (!long_hpd) { + if (dw_dp_needs_link_retrain(dp)) { + ret = dw_dp_link_retrain(dp); + if (ret) + dev_warn(dp->dev, "Retrain link failed\n"); + } + } else { + drm_helper_hpd_irq_event(dp->bridge.dev); + } +} + +static void dw_dp_handle_hpd_event(struct dw_dp *dp) +{ + u32 value; + + mutex_lock(&dp->irq_lock); + regmap_read(dp->regmap, DW_DP_HPD_STATUS, &value); + + if (value & HPD_IRQ) { + dev_dbg(dp->dev, "IRQ from the HPD\n"); + dp->hotplug.long_hpd = false; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_IRQ); + } + + if (value & HPD_HOT_PLUG) { + dev_dbg(dp->dev, "Hot plug detected\n"); + dp->hotplug.long_hpd = true; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_PLUG); + } + + if (value & HPD_HOT_UNPLUG) { + dev_dbg(dp->dev, "Unplug detected\n"); + dp->hotplug.long_hpd = true; + regmap_write(dp->regmap, DW_DP_HPD_STATUS, HPD_HOT_UNPLUG); + } + mutex_unlock(&dp->irq_lock); + + schedule_work(&dp->hpd_work); +} + +static irqreturn_t dw_dp_irq(int irq, void *data) +{ + struct dw_dp *dp = data; + u32 value; + + regmap_read(dp->regmap, DW_DP_GENERAL_INTERRUPT, &value); + if (!value) + return IRQ_NONE; + + if (value & HPD_EVENT) + dw_dp_handle_hpd_event(dp); + + if (value & AUX_REPLY_EVENT) { + regmap_write(dp->regmap, DW_DP_GENERAL_INTERRUPT, AUX_REPLY_EVENT); + complete(&dp->complete); + } + + return IRQ_HANDLED; +} + +static const struct regmap_range dw_dp_readable_ranges[] = { + regmap_reg_range(DW_DP_VERSION_NUMBER, DW_DP_ID), + regmap_reg_range(DW_DP_CONFIG_REG1, DW_DP_CONFIG_REG3), + regmap_reg_range(DW_DP_CCTL, DW_DP_SOFT_RESET_CTRL), + regmap_reg_range(DW_DP_VSAMPLE_CTRL, DW_DP_VIDEO_HBLANK_INTERVAL), + regmap_reg_range(DW_DP_AUD_CONFIG1, DW_DP_AUD_CONFIG1), + regmap_reg_range(DW_DP_SDP_VERTICAL_CTRL, DW_DP_SDP_STATUS_EN), + regmap_reg_range(DW_DP_PHYIF_CTRL, DW_DP_PHYIF_PWRDOWN_CTRL), + regmap_reg_range(DW_DP_AUX_CMD, DW_DP_AUX_DATA3), + regmap_reg_range(DW_DP_GENERAL_INTERRUPT, DW_DP_HPD_INTERRUPT_ENABLE), +}; + +static const struct regmap_access_table dw_dp_readable_table = { + .yes_ranges = dw_dp_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(dw_dp_readable_ranges), +}; + +static const struct regmap_config dw_dp_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .fast_io = true, + .max_register = DW_DP_MAX_REGISTER, + .rd_table = &dw_dp_readable_table, +}; + +static void dw_dp_phy_exit(void *data) +{ + struct dw_dp *dp = data; + + phy_exit(dp->phy); +} + +struct dw_dp *dw_dp_bind(struct device *dev, struct drm_encoder *encoder, + const struct dw_dp_plat_data *plat_data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dp *dp; + struct drm_bridge *bridge; + void __iomem *res; + int ret; + + dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); + if (!dp) + return ERR_PTR(-ENOMEM); + + dp = devm_drm_bridge_alloc(dev, struct dw_dp, bridge, &dw_dp_bridge_funcs); + if (IS_ERR(dp)) + return ERR_CAST(dp); + + dp->dev = dev; + dp->pixel_mode = DW_DP_MP_QUAD_PIXEL; + + dp->plat_data.max_link_rate = plat_data->max_link_rate; + bridge = &dp->bridge; + mutex_init(&dp->irq_lock); + INIT_WORK(&dp->hpd_work, dw_dp_hpd_work); + init_completion(&dp->complete); + + res = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(res)) + return ERR_CAST(res); + + dp->regmap = devm_regmap_init_mmio(dev, res, &dw_dp_regmap_config); + if (IS_ERR(dp->regmap)) { + dev_err_probe(dev, PTR_ERR(dp->regmap), "failed to create regmap\n"); + return ERR_CAST(dp->regmap); + } + + dp->phy = devm_of_phy_get(dev, dev->of_node, NULL); + if (IS_ERR(dp->phy)) { + dev_err_probe(dev, PTR_ERR(dp->phy), "failed to get phy\n"); + return ERR_CAST(dp->phy); + } + + dp->apb_clk = devm_clk_get_enabled(dev, "apb"); + if (IS_ERR(dp->apb_clk)) { + dev_err_probe(dev, PTR_ERR(dp->apb_clk), "failed to get apb clock\n"); + return ERR_CAST(dp->apb_clk); + } + + dp->aux_clk = devm_clk_get_enabled(dev, "aux"); + if (IS_ERR(dp->aux_clk)) { + dev_err_probe(dev, PTR_ERR(dp->aux_clk), "failed to get aux clock\n"); + return ERR_CAST(dp->aux_clk); + } + + dp->i2s_clk = devm_clk_get(dev, "i2s"); + if (IS_ERR(dp->i2s_clk)) { + dev_err_probe(dev, PTR_ERR(dp->i2s_clk), "failed to get i2s clock\n"); + return ERR_CAST(dp->i2s_clk); + } + + dp->spdif_clk = devm_clk_get(dev, "spdif"); + if (IS_ERR(dp->spdif_clk)) { + dev_err_probe(dev, PTR_ERR(dp->spdif_clk), "failed to get spdif clock\n"); + return ERR_CAST(dp->spdif_clk); + } + + dp->hdcp_clk = devm_clk_get(dev, "hdcp"); + if (IS_ERR(dp->hdcp_clk)) { + dev_err_probe(dev, PTR_ERR(dp->hdcp_clk), "failed to get hdcp clock\n"); + return ERR_CAST(dp->hdcp_clk); + } + + dp->rstc = devm_reset_control_get(dev, NULL); + if (IS_ERR(dp->rstc)) { + dev_err_probe(dev, PTR_ERR(dp->rstc), "failed to get reset control\n"); + return ERR_CAST(dp->rstc); + } + + bridge->of_node = dev->of_node; + bridge->ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_HPD; + bridge->type = DRM_MODE_CONNECTOR_DisplayPort; + bridge->ycbcr_420_allowed = true; + + dp->aux.dev = dev; + dp->aux.drm_dev = encoder->dev; + dp->aux.name = dev_name(dev); + dp->aux.transfer = dw_dp_aux_transfer; + ret = drm_dp_aux_register(&dp->aux); + if (ret) { + dev_err_probe(dev, ret, "Aux register failed\n"); + return ERR_PTR(ret); + } + + ret = drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) + dev_err_probe(dev, ret, "Failed to attach bridge\n"); + + dw_dp_init_hw(dp); + + ret = phy_init(dp->phy); + if (ret) { + dev_err_probe(dev, ret, "phy init failed\n"); + return ERR_PTR(ret); + } + + ret = devm_add_action_or_reset(dev, dw_dp_phy_exit, dp); + if (ret) + return ERR_PTR(ret); + + dp->irq = platform_get_irq(pdev, 0); + if (dp->irq < 0) + return ERR_PTR(ret); + + ret = devm_request_threaded_irq(dev, dp->irq, NULL, dw_dp_irq, + IRQF_ONESHOT, dev_name(dev), dp); + if (ret) { + dev_err_probe(dev, ret, "failed to request irq\n"); + return ERR_PTR(ret); + } + + return dp; +} +EXPORT_SYMBOL_GPL(dw_dp_bind); + +MODULE_AUTHOR("Andy Yan <andyshrk@163.com>"); +MODULE_DESCRIPTION("DW DP Core Library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index 7ade80f02a94..39332c57f2c5 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -440,8 +440,8 @@ static void dw_hdmi_qp_set_sample_rate(struct dw_hdmi_qp *hdmi, unsigned long lo dw_hdmi_qp_set_cts_n(hdmi, cts, n); } -static int dw_hdmi_qp_audio_enable(struct drm_connector *connector, - struct drm_bridge *bridge) +static int dw_hdmi_qp_audio_enable(struct drm_bridge *bridge, + struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = dw_hdmi_qp_from_bridge(bridge); @@ -451,8 +451,8 @@ static int dw_hdmi_qp_audio_enable(struct drm_connector *connector, return 0; } -static int dw_hdmi_qp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +static int dw_hdmi_qp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -497,8 +497,8 @@ static void dw_hdmi_qp_audio_disable_regs(struct dw_hdmi_qp *hdmi) AVP_DATAPATH_PACKET_AUDIO_SWDISABLE, GLOBAL_SWDISABLE); } -static void dw_hdmi_qp_audio_disable(struct drm_connector *connector, - struct drm_bridge *bridge) +static void dw_hdmi_qp_audio_disable(struct drm_bridge *bridge, + struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = dw_hdmi_qp_from_bridge(bridge); @@ -876,7 +876,7 @@ static void dw_hdmi_qp_bridge_atomic_disable(struct drm_bridge *bridge, } static enum drm_connector_status -dw_hdmi_qp_bridge_detect(struct drm_bridge *bridge) +dw_hdmi_qp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = bridge->driver_private; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 76c6570e2a85..206b099a35e9 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2978,7 +2978,8 @@ static void dw_hdmi_bridge_atomic_enable(struct drm_bridge *bridge, mutex_unlock(&hdmi->mutex); } -static enum drm_connector_status dw_hdmi_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +dw_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct dw_hdmi *hdmi = bridge->driver_private; diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 61559467e2d2..4097fef4b86b 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1760,7 +1760,8 @@ static const struct drm_connector_helper_funcs tc_connector_helper_funcs = { .get_modes = tc_connector_get_modes, }; -static enum drm_connector_status tc_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +tc_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct tc_data *tc = bridge_to_tc(bridge); bool conn; @@ -1785,7 +1786,7 @@ tc_connector_detect(struct drm_connector *connector, bool force) struct tc_data *tc = connector_to_tc(connector); if (tc->hpd_pin >= 0) - return tc_bridge_detect(&tc->bridge); + return tc_bridge_detect(&tc->bridge, connector); if (tc->panel_bridge) return connector_status_connected; @@ -2422,6 +2423,7 @@ static int tc_probe_bridge_endpoint(struct tc_data *tc, enum tc_mode mode) struct device_node *node = NULL; for_each_endpoint_of_node(dev->of_node, node) { + of_graph_parse_endpoint(node, &endpoint); if (endpoint.port == 2) { of_property_read_u8_array(node, "toshiba,pre-emphasis", tc->pre_emphasis, diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 78a50b947a08..ae0d08e5e960 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -348,12 +348,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata, * 200 ms. We'll assume that the panel driver will have the hardcoded * delay in its prepare and always disable HPD. * - * If HPD somehow makes sense on some future panel we'll have to - * change this to be conditional on someone specifying that HPD should - * be used. + * For DisplayPort bridge type, we need HPD. So we use the bridge type + * to conditionally disable HPD. + * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms() + * can be called before. So for DisplayPort, HPD will be enabled once + * bridge type is set. We are using bridge type instead of "no-hpd" + * property because it is not used properly in devicetree description + * and hence is unreliable. */ - regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, - HPD_DISABLE); + + if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, + HPD_DISABLE); pdata->comms_enabled = true; @@ -387,6 +393,17 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) gpiod_set_value_cansleep(pdata->enable_gpio, 1); /* + * After EN is deasserted and an external clock is detected, the bridge + * will sample GPIO3:1 to determine its frequency. The driver will + * overwrite this setting in ti_sn_bridge_set_refclk_freq(). But this is + * racy. Thus we have to wait a couple of us. According to the datasheet + * the GPIO lines has to be stable at least 5 us (td5) but it seems that + * is not enough and the refclk frequency value is still lost or + * overwritten by the bridge itself. Waiting for 20us seems to work. + */ + usleep_range(20, 30); + + /* * If we have a reference clock we can enable communication w/ the * panel (including the aux channel) w/out any need for an input clock * so we can do it in resume which lets us read the EDID before @@ -1155,14 +1172,20 @@ static void ti_sn_bridge_atomic_post_disable(struct drm_bridge *bridge, pm_runtime_put_sync(pdata->dev); } -static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +ti_sn_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; - pm_runtime_get_sync(pdata->dev); + /* + * Runtime reference is grabbed in ti_sn_bridge_hpd_enable() + * as the chip won't report HPD just after being powered on. + * HPD_DEBOUNCED_STATE reflects correct state only after the + * debounce time (~100-400 ms). + */ + regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val); - pm_runtime_put_autosuspend(pdata->dev); return val & HPD_DEBOUNCED_STATE ? connector_status_connected : connector_status_disconnected; @@ -1185,6 +1208,26 @@ static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry * debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); } +static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + /* + * Device needs to be powered on before reading the HPD state + * for reliable hpd detection in ti_sn_bridge_detect() due to + * the high debounce time. + */ + + pm_runtime_get_sync(pdata->dev); +} + +static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + pm_runtime_put_autosuspend(pdata->dev); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1199,6 +1242,8 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .debugfs_init = ti_sn65dsi86_debugfs_init, + .hpd_enable = ti_sn_bridge_hpd_enable, + .hpd_disable = ti_sn_bridge_hpd_disable, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1286,8 +1331,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; - if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_HPD; + /* + * If comms were already enabled they would have been enabled + * with the wrong value of HPD_DISABLE. Update it now. Comms + * could be enabled if anyone is holding a pm_runtime reference + * (like if a GPIO is in use). Note that in most cases nobody + * is doing AUX channel xfers before the bridge is added so + * HPD doesn't _really_ matter then. The only exception is in + * the eDP case where the panel wants to read the EDID before + * the bridge is added. We always consistently have HPD disabled + * for eDP. + */ + mutex_lock(&pdata->comms_mutex); + if (pdata->comms_enabled) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); + } drm_bridge_add(&pdata->bridge); @@ -1677,11 +1740,6 @@ static int ti_sn_bridge_gpio_set(struct gpio_chip *chip, unsigned int offset, { struct ti_sn65dsi86 *pdata = gpiochip_get_data(chip); - if (!test_bit(offset, pdata->gchip_output)) { - dev_err(pdata->dev, "Ignoring GPIO set while input\n"); - return -EPERM; - } - val &= 1; return regmap_update_bits(pdata->regmap, SN_GPIO_IO_REG, BIT(SN_GPIO_OUTPUT_SHIFT + offset), @@ -1789,7 +1847,7 @@ static int ti_sn_gpio_probe(struct auxiliary_device *adev, pdata->gchip.direction_input = ti_sn_bridge_gpio_direction_input; pdata->gchip.direction_output = ti_sn_bridge_gpio_direction_output; pdata->gchip.get = ti_sn_bridge_gpio_get; - pdata->gchip.set_rv = ti_sn_bridge_gpio_set; + pdata->gchip.set = ti_sn_bridge_gpio_set; pdata->gchip.can_sleep = true; pdata->gchip.names = ti_sn_bridge_gpio_names; pdata->gchip.ngpio = SN_NUM_GPIOS; diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 549e8e8edeb4..b80ee089f880 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -89,7 +89,7 @@ tfp410_connector_detect(struct drm_connector *connector, bool force) { struct tfp410 *dvi = drm_connector_to_tfp410(connector); - return drm_bridge_detect(dvi->next_bridge); + return drm_bridge_detect(dvi->next_bridge, connector); } static const struct drm_connector_funcs tfp410_con_funcs = { diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c index 0919364e80d1..dcf686c4e73d 100644 --- a/drivers/gpu/drm/bridge/ti-tpd12s015.c +++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c @@ -77,6 +77,12 @@ static enum drm_connector_status tpd12s015_detect(struct drm_bridge *bridge) return connector_status_disconnected; } +static enum drm_connector_status +tpd12s015_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) +{ + return tpd12s015_detect(bridge); +} + static void tpd12s015_hpd_enable(struct drm_bridge *bridge) { struct tpd12s015_device *tpd = to_tpd12s015(bridge); @@ -94,7 +100,7 @@ static void tpd12s015_hpd_disable(struct drm_bridge *bridge) static const struct drm_bridge_funcs tpd12s015_bridge_funcs = { .attach = tpd12s015_attach, .detach = tpd12s015_detach, - .detect = tpd12s015_detect, + .detect = tpd12s015_bridge_detect, .hpd_enable = tpd12s015_hpd_enable, .hpd_disable = tpd12s015_hpd_disable, }; diff --git a/drivers/gpu/drm/bridge/waveshare-dsi.c b/drivers/gpu/drm/bridge/waveshare-dsi.c new file mode 100644 index 000000000000..43f4e7412d72 --- /dev/null +++ b/drivers/gpu/drm/bridge/waveshare-dsi.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2025 NXP + * Based on panel-raspberrypi-touchscreen by Broadcom + */ + +#include <linux/backlight.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_graph.h> +#include <linux/regmap.h> + +#include <drm/drm_bridge.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> +#include <drm/drm_panel.h> + +struct ws_bridge { + struct drm_bridge bridge; + struct drm_bridge *next_bridge; + struct backlight_device *backlight; + struct device *dev; + struct regmap *reg_map; +}; + +static const struct regmap_config ws_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0xff, +}; + +static struct ws_bridge *bridge_to_ws_bridge(struct drm_bridge *bridge) +{ + return container_of(bridge, struct ws_bridge, bridge); +} + +static int ws_bridge_attach_dsi(struct ws_bridge *ws) +{ + const struct mipi_dsi_device_info info = { + .type = "ws-bridge", + .channel = 0, + .node = NULL, + }; + struct device_node *dsi_host_node; + struct device *dev = ws->dev; + struct mipi_dsi_device *dsi; + struct mipi_dsi_host *host; + int ret; + + dsi_host_node = of_graph_get_remote_node(dev->of_node, 0, 0); + if (!dsi_host_node) { + dev_err(dev, "Failed to get remote port\n"); + return -ENODEV; + } + host = of_find_mipi_dsi_host_by_node(dsi_host_node); + of_node_put(dsi_host_node); + if (!host) + return dev_err_probe(dev, -EPROBE_DEFER, "Failed to find dsi_host\n"); + + dsi = devm_mipi_dsi_device_register_full(dev, host, &info); + if (IS_ERR(dsi)) + return dev_err_probe(dev, PTR_ERR(dsi), "Failed to create dsi device\n"); + + dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO | + MIPI_DSI_CLOCK_NON_CONTINUOUS; + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->lanes = 2; + + ret = devm_mipi_dsi_attach(dev, dsi); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to attach dsi to host\n"); + + return 0; +} + +static int ws_bridge_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) +{ + struct ws_bridge *ws = bridge_to_ws_bridge(bridge); + int ret; + + ret = ws_bridge_attach_dsi(ws); + if (ret) + return ret; + + return drm_bridge_attach(encoder, ws->next_bridge, + &ws->bridge, flags); +} + +static void ws_bridge_bridge_enable(struct drm_bridge *bridge) +{ + struct ws_bridge *ws = bridge_to_ws_bridge(bridge); + + regmap_write(ws->reg_map, 0xad, 0x01); + backlight_enable(ws->backlight); +} + +static void ws_bridge_bridge_disable(struct drm_bridge *bridge) +{ + struct ws_bridge *ws = bridge_to_ws_bridge(bridge); + + backlight_disable(ws->backlight); + regmap_write(ws->reg_map, 0xad, 0x00); +} + +static const struct drm_bridge_funcs ws_bridge_bridge_funcs = { + .enable = ws_bridge_bridge_enable, + .disable = ws_bridge_bridge_disable, + .attach = ws_bridge_bridge_attach, +}; + +static int ws_bridge_bl_update_status(struct backlight_device *bl) +{ + struct ws_bridge *ws = bl_get_data(bl); + + regmap_write(ws->reg_map, 0xab, 0xff - backlight_get_brightness(bl)); + regmap_write(ws->reg_map, 0xaa, 0x01); + + return 0; +} + +static const struct backlight_ops ws_bridge_bl_ops = { + .update_status = ws_bridge_bl_update_status, +}; + +static struct backlight_device *ws_bridge_create_backlight(struct ws_bridge *ws) +{ + const struct backlight_properties props = { + .type = BACKLIGHT_RAW, + .brightness = 255, + .max_brightness = 255, + }; + struct device *dev = ws->dev; + + return devm_backlight_device_register(dev, dev_name(dev), dev, ws, + &ws_bridge_bl_ops, &props); +} + +static int ws_bridge_probe(struct i2c_client *i2c) +{ + struct device *dev = &i2c->dev; + struct drm_panel *panel; + struct ws_bridge *ws; + int ret; + + ws = devm_drm_bridge_alloc(dev, struct ws_bridge, bridge, &ws_bridge_bridge_funcs); + if (IS_ERR(ws)) + return PTR_ERR(ws); + + ws->dev = dev; + + ws->reg_map = devm_regmap_init_i2c(i2c, &ws_regmap_config); + if (IS_ERR(ws->reg_map)) + return dev_err_probe(dev, PTR_ERR(ws->reg_map), "Failed to allocate regmap\n"); + + ret = drm_of_find_panel_or_bridge(dev->of_node, 1, -1, &panel, NULL); + if (ret) + return dev_err_probe(dev, ret, "Failed to find remote panel\n"); + + ws->next_bridge = devm_drm_panel_bridge_add(dev, panel); + if (IS_ERR(ws->next_bridge)) + return PTR_ERR(ws->next_bridge); + + ws->backlight = ws_bridge_create_backlight(ws); + if (IS_ERR(ws->backlight)) { + ret = PTR_ERR(ws->backlight); + dev_err(dev, "Failed to create backlight: %d\n", ret); + return ret; + } + + regmap_write(ws->reg_map, 0xc0, 0x01); + regmap_write(ws->reg_map, 0xc2, 0x01); + regmap_write(ws->reg_map, 0xac, 0x01); + + ws->bridge.type = DRM_MODE_CONNECTOR_DPI; + ws->bridge.of_node = dev->of_node; + devm_drm_bridge_add(dev, &ws->bridge); + + return 0; +} + +static const struct of_device_id ws_bridge_of_ids[] = { + {.compatible = "waveshare,dsi2dpi",}, + { } +}; + +MODULE_DEVICE_TABLE(of, ws_bridge_of_ids); + +static struct i2c_driver ws_bridge_driver = { + .driver = { + .name = "ws_dsi2dpi", + .of_match_table = ws_bridge_of_ids, + }, + .probe = ws_bridge_probe, +}; +module_i2c_driver(ws_bridge_driver); + +MODULE_AUTHOR("Joseph Guo <qijian.guo@nxp.com>"); +MODULE_DESCRIPTION("Waveshare DSI2DPI bridge driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/clients/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c index aec2fab6d2bf..72480db1f00d 100644 --- a/drivers/gpu/drm/clients/drm_client_setup.c +++ b/drivers/gpu/drm/clients/drm_client_setup.c @@ -4,6 +4,7 @@ #include <drm/clients/drm_client_setup.h> #include <drm/drm_device.h> +#include <drm/drm_drv.h> #include <drm/drm_fourcc.h> #include <drm/drm_print.h> @@ -33,6 +34,10 @@ MODULE_PARM_DESC(active, */ void drm_client_setup(struct drm_device *dev, const struct drm_format_info *format) { + if (!drm_core_check_feature(dev, DRIVER_MODESET)) { + drm_dbg(dev, "driver does not support mode-setting, skipping DRM clients\n"); + return; + } #ifdef CONFIG_DRM_FBDEV_EMULATION if (!strcmp(drm_client_default, "fbdev")) { diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 6cdb432dbc30..baacd21e7341 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -20,6 +20,7 @@ #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> +#include <drm/display/drm_hdcp_helper.h> #include <drm/display/drm_hdmi_audio_helper.h> #include <drm/display/drm_hdmi_cec_helper.h> #include <drm/display/drm_hdmi_helper.h> @@ -210,7 +211,7 @@ drm_bridge_connector_detect(struct drm_connector *connector, bool force) enum drm_connector_status status; if (detect) { - status = detect->funcs->detect(detect); + status = detect->funcs->detect(detect, connector); if (hdmi) drm_atomic_helper_connector_hdmi_hotplug(connector, status); @@ -463,7 +464,7 @@ static int drm_bridge_connector_audio_startup(struct drm_connector *connector) if (!bridge->funcs->hdmi_audio_startup) return 0; - return bridge->funcs->hdmi_audio_startup(connector, bridge); + return bridge->funcs->hdmi_audio_startup(bridge, connector); } if (bridge_connector->bridge_dp_audio) { @@ -472,7 +473,7 @@ static int drm_bridge_connector_audio_startup(struct drm_connector *connector) if (!bridge->funcs->dp_audio_startup) return 0; - return bridge->funcs->dp_audio_startup(connector, bridge); + return bridge->funcs->dp_audio_startup(bridge, connector); } return -EINVAL; @@ -489,13 +490,13 @@ static int drm_bridge_connector_audio_prepare(struct drm_connector *connector, if (bridge_connector->bridge_hdmi_audio) { bridge = bridge_connector->bridge_hdmi_audio; - return bridge->funcs->hdmi_audio_prepare(connector, bridge, fmt, hparms); + return bridge->funcs->hdmi_audio_prepare(bridge, connector, fmt, hparms); } if (bridge_connector->bridge_dp_audio) { bridge = bridge_connector->bridge_dp_audio; - return bridge->funcs->dp_audio_prepare(connector, bridge, fmt, hparms); + return bridge->funcs->dp_audio_prepare(bridge, connector, fmt, hparms); } return -EINVAL; @@ -509,12 +510,12 @@ static void drm_bridge_connector_audio_shutdown(struct drm_connector *connector) if (bridge_connector->bridge_hdmi_audio) { bridge = bridge_connector->bridge_hdmi_audio; - bridge->funcs->hdmi_audio_shutdown(connector, bridge); + bridge->funcs->hdmi_audio_shutdown(bridge, connector); } if (bridge_connector->bridge_dp_audio) { bridge = bridge_connector->bridge_dp_audio; - bridge->funcs->dp_audio_shutdown(connector, bridge); + bridge->funcs->dp_audio_shutdown(bridge, connector); } } @@ -531,7 +532,7 @@ static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connecto if (!bridge->funcs->hdmi_audio_mute_stream) return -ENOTSUPP; - return bridge->funcs->hdmi_audio_mute_stream(connector, bridge, + return bridge->funcs->hdmi_audio_mute_stream(bridge, connector, enable, direction); } @@ -541,7 +542,7 @@ static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connecto if (!bridge->funcs->dp_audio_mute_stream) return -ENOTSUPP; - return bridge->funcs->dp_audio_mute_stream(connector, bridge, + return bridge->funcs->dp_audio_mute_stream(bridge, connector, enable, direction); } @@ -604,7 +605,7 @@ static int drm_bridge_connector_hdmi_cec_init(struct drm_connector *connector) if (!bridge->funcs->hdmi_cec_init) return 0; - return bridge->funcs->hdmi_cec_init(connector, bridge); + return bridge->funcs->hdmi_cec_init(bridge, connector); } static const struct drm_connector_hdmi_cec_funcs drm_bridge_connector_hdmi_cec_funcs = { @@ -641,6 +642,7 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, struct drm_bridge *bridge, *panel_bridge = NULL; unsigned int supported_formats = BIT(HDMI_COLORSPACE_RGB); unsigned int max_bpc = 8; + bool support_hdcp = false; int connector_type; int ret; @@ -749,12 +751,11 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, return ERR_PTR(-EINVAL); } - if (!drm_bridge_get_next_bridge(bridge)) + if (drm_bridge_is_last(bridge)) connector_type = bridge->type; #ifdef CONFIG_OF - if (!drm_bridge_get_next_bridge(bridge) && - bridge->of_node) + if (drm_bridge_is_last(bridge) && bridge->of_node) connector->fwnode = fwnode_handle_get(of_fwnode_handle(bridge->of_node)); #endif @@ -763,6 +764,9 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (drm_bridge_is_panel(bridge)) panel_bridge = bridge; + + if (bridge->support_hdcp) + support_hdcp = true; } if (connector_type == DRM_MODE_CONNECTOR_Unknown) @@ -772,8 +776,6 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (!connector->ycbcr_420_allowed) supported_formats &= ~BIT(HDMI_COLORSPACE_YUV420); - bridge = bridge_connector->bridge_hdmi; - ret = drmm_connector_hdmi_init(drm, connector, bridge_connector->bridge_hdmi->vendor, bridge_connector->bridge_hdmi->product, @@ -795,11 +797,14 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_audio || bridge_connector->bridge_dp_audio) { struct device *dev; + struct drm_bridge *bridge; if (bridge_connector->bridge_hdmi_audio) - dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_hdmi_audio; else - dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_dp_audio; + + dev = bridge->hdmi_audio_dev; ret = drm_connector_hdmi_audio_init(connector, dev, &drm_bridge_connector_hdmi_audio_funcs, @@ -813,6 +818,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_cec && bridge_connector->bridge_hdmi_cec->ops & DRM_BRIDGE_OP_HDMI_CEC_NOTIFIER) { + bridge = bridge_connector->bridge_hdmi_cec; + ret = drmm_connector_hdmi_cec_notifier_register(connector, NULL, bridge->hdmi_cec_dev); @@ -822,6 +829,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_cec && bridge_connector->bridge_hdmi_cec->ops & DRM_BRIDGE_OP_HDMI_CEC_ADAPTER) { + bridge = bridge_connector->bridge_hdmi_cec; + ret = drmm_connector_hdmi_cec_register(connector, &drm_bridge_connector_hdmi_cec_funcs, bridge->hdmi_cec_adapter_name, @@ -842,6 +851,10 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (panel_bridge) drm_panel_bridge_set_orientation(connector, panel_bridge); + if (support_hdcp && IS_REACHABLE(CONFIG_DRM_DISPLAY_HELPER) && + IS_ENABLED(CONFIG_DRM_DISPLAY_HDCP_HELPER)) + drm_connector_attach_content_protection_property(connector, true); + return connector; } EXPORT_SYMBOL_GPL(drm_bridge_connector_init); diff --git a/drivers/gpu/drm/display/drm_dp_aux_bus.c b/drivers/gpu/drm/display/drm_dp_aux_bus.c index 7b9afcf48836..2d279e82922f 100644 --- a/drivers/gpu/drm/display/drm_dp_aux_bus.c +++ b/drivers/gpu/drm/display/drm_dp_aux_bus.c @@ -58,7 +58,7 @@ static int dp_aux_ep_probe(struct device *dev) container_of(aux_ep, struct dp_aux_ep_device_with_data, aux_ep); int ret; - ret = dev_pm_domain_attach(dev, true); + ret = dev_pm_domain_attach(dev, PD_FLAG_ATTACH_POWER_ON); if (ret) return dev_err_probe(dev, ret, "Failed to attach to PM Domain\n"); diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 3b50d817c839..436bfe9f9081 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -42,7 +42,7 @@ * * https://hverkuil.home.xs4all.nl/cec-status.txt * - * Please mail me (hverkuil@xs4all.nl) if you find an adapter that works + * Please mail me (hverkuil@kernel.org) if you find an adapter that works * and is not yet listed there. * * Note that the current implementation does not support CEC over an MST hub. diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 385a1bfdb272..4aaeae4fa03c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -742,7 +742,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, int ret; if (dpcd_access_needs_probe(aux)) { - ret = drm_dp_dpcd_probe(aux, DP_LANE0_1_STATUS); + ret = drm_dp_dpcd_probe(aux, DP_TRAINING_PATTERN_SET); if (ret < 0) return ret; } @@ -3957,23 +3957,33 @@ EXPORT_SYMBOL(drm_dp_pcon_convert_rgb_to_ycbcr); * Returns: %0 on success, negative error code on failure */ int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, - u16 level) + u32 level) { int ret; - u8 buf[2] = { 0 }; + unsigned int offset = DP_EDP_BACKLIGHT_BRIGHTNESS_MSB; + u8 buf[3] = { 0 }; + size_t len = 2; /* The panel uses the PWM for controlling brightness levels */ - if (!bl->aux_set) + if (!(bl->aux_set || bl->luminance_set)) return 0; - if (bl->lsb_reg_used) { + if (bl->luminance_set) { + level = level * 1000; + level &= 0xffffff; + buf[0] = (level & 0x0000ff); + buf[1] = (level & 0x00ff00) >> 8; + buf[2] = (level & 0xff0000) >> 16; + offset = DP_EDP_PANEL_TARGET_LUMINANCE_VALUE; + len = 3; + } else if (bl->lsb_reg_used) { buf[0] = (level & 0xff00) >> 8; buf[1] = (level & 0x00ff); } else { buf[0] = level; } - ret = drm_dp_dpcd_write_data(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, sizeof(buf)); + ret = drm_dp_dpcd_write_data(aux, offset, buf, len); if (ret < 0) { drm_err(aux->drm_dev, "%s: Failed to write aux backlight level: %d\n", @@ -4036,7 +4046,7 @@ drm_edp_backlight_set_enable(struct drm_dp_aux *aux, const struct drm_edp_backli * Returns: %0 on success, negative error code on failure. */ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, - const u16 level) + const u32 level) { int ret; u8 dpcd_buf; @@ -4046,6 +4056,9 @@ int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backli else dpcd_buf = DP_EDP_BACKLIGHT_CONTROL_MODE_PWM; + if (bl->luminance_set) + dpcd_buf |= DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE; + if (bl->pwmgen_bit_count) { ret = drm_dp_dpcd_write_byte(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count); if (ret < 0) @@ -4209,7 +4222,7 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i u8 *current_mode) { int ret; - u8 buf[2]; + u8 buf[3]; u8 mode_reg; ret = drm_dp_dpcd_read_byte(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &mode_reg); @@ -4226,17 +4239,37 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i if (*current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { int size = 1 + bl->lsb_reg_used; - ret = drm_dp_dpcd_read_data(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, size); - if (ret < 0) { - drm_dbg_kms(aux->drm_dev, "%s: Failed to read backlight level: %d\n", - aux->name, ret); - return ret; - } + if (bl->luminance_set) { + ret = drm_dp_dpcd_read_data(aux, DP_EDP_PANEL_TARGET_LUMINANCE_VALUE, + buf, sizeof(buf)); + if (ret < 0) { + drm_dbg_kms(aux->drm_dev, + "%s: Failed to read backlight level: %d\n", + aux->name, ret); + return ret; + } - if (bl->lsb_reg_used) - return (buf[0] << 8) | buf[1]; - else - return buf[0]; + /* + * Incase luminance is set we want to send the value back in nits but + * since DP_EDP_PANEL_TARGET_LUMINANCE stores values in millinits we + * need to divide by 1000. + */ + return (buf[0] | buf[1] << 8 | buf[2] << 16) / 1000; + } else { + ret = drm_dp_dpcd_read_data(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, + buf, size); + if (ret < 0) { + drm_dbg_kms(aux->drm_dev, + "%s: Failed to read backlight level: %d\n", + aux->name, ret); + return ret; + } + + if (bl->lsb_reg_used) + return (buf[0] << 8) | buf[1]; + else + return buf[0]; + } } /* @@ -4251,10 +4284,12 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i * interface. * @aux: The DP aux device to use for probing * @bl: The &drm_edp_backlight_info struct to fill out with information on the backlight + * @max_luminance: max luminance when need luminance is set as true * @driver_pwm_freq_hz: Optional PWM frequency from the driver in hz * @edp_dpcd: A cached copy of the eDP DPCD * @current_level: Where to store the probed brightness level, if any * @current_mode: Where to store the currently set backlight control mode + * @need_luminance: Tells us if a we want to manipulate backlight using luminance values * * Initializes a &drm_edp_backlight_info struct by probing @aux for it's backlight capabilities, * along with also probing the current and maximum supported brightness levels. @@ -4266,8 +4301,9 @@ drm_edp_backlight_probe_state(struct drm_dp_aux *aux, struct drm_edp_backlight_i */ int drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u32 max_luminance, u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], - u16 *current_level, u8 *current_mode) + u32 *current_level, u8 *current_mode, bool need_luminance) { int ret; @@ -4277,18 +4313,26 @@ drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl bl->aux_set = true; if (edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) bl->lsb_reg_used = true; + if ((edp_dpcd[0] & DP_EDP_15) && edp_dpcd[3] & + (DP_EDP_PANEL_LUMINANCE_CONTROL_CAPABLE) && need_luminance) + bl->luminance_set = true; /* Sanity check caps */ - if (!bl->aux_set && !(edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) { + if (!bl->aux_set && !(edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP) && + !bl->luminance_set) { drm_dbg_kms(aux->drm_dev, - "%s: Panel supports neither AUX or PWM brightness control? Aborting\n", + "%s: Panel does not support AUX, PWM or luminance-based brightness control. Aborting\n", aux->name); return -EINVAL; } - ret = drm_edp_backlight_probe_max(aux, bl, driver_pwm_freq_hz, edp_dpcd); - if (ret < 0) - return ret; + if (bl->luminance_set) { + bl->max = max_luminance; + } else { + ret = drm_edp_backlight_probe_max(aux, bl, driver_pwm_freq_hz, edp_dpcd); + if (ret < 0) + return ret; + } ret = drm_edp_backlight_probe_state(aux, bl, current_mode); if (ret < 0) @@ -4367,7 +4411,7 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) { struct dp_aux_backlight *bl; struct backlight_properties props = { 0 }; - u16 current_level; + u32 current_level; u8 current_mode; u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; int ret; @@ -4391,8 +4435,8 @@ int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) bl->aux = aux; - ret = drm_edp_backlight_init(aux, &bl->info, 0, edp_dpcd, - ¤t_level, ¤t_mode); + ret = drm_edp_backlight_init(aux, &bl->info, 0, 0, edp_dpcd, + ¤t_level, ¤t_mode, false); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/display/drm_dp_tunnel.c b/drivers/gpu/drm/display/drm_dp_tunnel.c index 8a4ef5438f35..43f13a7c79b9 100644 --- a/drivers/gpu/drm/display/drm_dp_tunnel.c +++ b/drivers/gpu/drm/display/drm_dp_tunnel.c @@ -1921,7 +1921,7 @@ drm_dp_tunnel_mgr_create(struct drm_device *dev, int max_group_count) } #ifdef CONFIG_DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG - ref_tracker_dir_init(&mgr->ref_tracker, 16, "dptun"); + ref_tracker_dir_init(&mgr->ref_tracker, 16, "drm_dptun"); #endif for (i = 0; i < max_group_count; i++) { diff --git a/drivers/gpu/drm/display/drm_hdmi_cec_helper.c b/drivers/gpu/drm/display/drm_hdmi_cec_helper.c index b4273c3522fa..3651ad0f76e0 100644 --- a/drivers/gpu/drm/display/drm_hdmi_cec_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_cec_helper.c @@ -69,7 +69,7 @@ static void drm_connector_hdmi_cec_adapter_unregister(struct drm_device *dev, vo struct drm_connector *connector = res; struct drm_connector_hdmi_cec_data *data = connector->cec.data; - cec_delete_adapter(data->adapter); + cec_unregister_adapter(data->adapter); if (data->funcs->uninit) data->funcs->uninit(connector); diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ef56b474acf5..d5ebe6ea0acb 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -456,6 +456,7 @@ mode_fixup(struct drm_atomic_state *state) ret = drm_atomic_bridge_chain_check(bridge, new_crtc_state, new_conn_state); + drm_bridge_put(bridge); if (ret) { drm_dbg_atomic(encoder->dev, "Bridge atomic check failed\n"); return ret; @@ -527,6 +528,7 @@ static enum drm_mode_status mode_valid_path(struct drm_connector *connector, bridge = drm_bridge_chain_get_first_bridge(encoder); ret = drm_bridge_chain_mode_valid(bridge, &connector->display_info, mode); + drm_bridge_put(bridge); if (ret != MODE_OK) { drm_dbg_atomic(encoder->dev, "[BRIDGE] mode_valid() failed\n"); return ret; @@ -1212,6 +1214,7 @@ encoder_bridge_disable(struct drm_device *dev, struct drm_atomic_state *state) */ bridge = drm_bridge_chain_get_first_bridge(encoder); drm_atomic_bridge_chain_disable(bridge, state); + drm_bridge_put(bridge); /* Right function depends upon target state. */ if (funcs) { @@ -1329,6 +1332,7 @@ encoder_bridge_post_disable(struct drm_device *dev, struct drm_atomic_state *sta */ bridge = drm_bridge_chain_get_first_bridge(encoder); drm_atomic_bridge_chain_post_disable(bridge, state); + drm_bridge_put(bridge); } } @@ -1501,6 +1505,7 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *state) bridge = drm_bridge_chain_get_first_bridge(encoder); drm_bridge_chain_mode_set(bridge, mode, adjusted_mode); + drm_bridge_put(bridge); } } @@ -1580,6 +1585,7 @@ encoder_bridge_pre_enable(struct drm_device *dev, struct drm_atomic_state *state */ bridge = drm_bridge_chain_get_first_bridge(encoder); drm_atomic_bridge_chain_pre_enable(bridge, state); + drm_bridge_put(bridge); } } @@ -1655,6 +1661,7 @@ encoder_bridge_enable(struct drm_device *dev, struct drm_atomic_state *state) } drm_atomic_bridge_chain_enable(bridge, state); + drm_bridge_put(bridge); } } diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index ecc73d52bfae..85dbdaa4a2e2 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1078,19 +1078,20 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } if (async_flip) { - /* check if the prop does a nop change */ - if ((prop != config->prop_fb_id && - prop != config->prop_in_fence_fd && - prop != config->prop_fb_damage_clips)) { - ret = drm_atomic_plane_get_property(plane, plane_state, - prop, &old_val); - ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - } + /* no-op changes are always allowed */ + ret = drm_atomic_plane_get_property(plane, plane_state, + prop, &old_val); + ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); - /* ask the driver if this non-primary plane is supported */ - if (plane->type != DRM_PLANE_TYPE_PRIMARY) { - ret = -EINVAL; + /* fail everything that isn't no-op or a pure flip */ + if (ret && prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips) { + break; + } + if (ret && plane->type != DRM_PLANE_TYPE_PRIMARY) { + /* ask the driver if this non-primary plane is supported */ if (plane_funcs && plane_funcs->atomic_async_check) ret = plane_funcs->atomic_async_check(plane, state, true); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index d6ce7b4c019f..d031447eebc9 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -295,6 +295,11 @@ EXPORT_SYMBOL(__devm_drm_bridge_alloc); */ void drm_bridge_add(struct drm_bridge *bridge) { + if (!bridge->container) + DRM_WARN("DRM bridge corrupted or not allocated by devm_drm_bridge_alloc()\n"); + + drm_bridge_get(bridge); + mutex_init(&bridge->hpd_mutex); if (bridge->ops & DRM_BRIDGE_OP_HDMI) @@ -342,6 +347,8 @@ void drm_bridge_remove(struct drm_bridge *bridge) mutex_unlock(&bridge_lock); mutex_destroy(&bridge->hpd_mutex); + + drm_bridge_put(bridge); } EXPORT_SYMBOL(drm_bridge_remove); @@ -407,11 +414,17 @@ int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge, if (!encoder || !bridge) return -EINVAL; - if (previous && (!previous->dev || previous->encoder != encoder)) - return -EINVAL; + drm_bridge_get(bridge); + + if (previous && (!previous->dev || previous->encoder != encoder)) { + ret = -EINVAL; + goto err_put_bridge; + } - if (bridge->dev) - return -EBUSY; + if (bridge->dev) { + ret = -EBUSY; + goto err_put_bridge; + } bridge->dev = encoder->dev; bridge->encoder = encoder; @@ -460,6 +473,8 @@ err_reset_bridge: "failed to attach bridge %pOF to encoder %s\n", bridge->of_node, encoder->name); +err_put_bridge: + drm_bridge_put(bridge); return ret; } EXPORT_SYMBOL(drm_bridge_attach); @@ -480,6 +495,7 @@ void drm_bridge_detach(struct drm_bridge *bridge) list_del(&bridge->chain_node); bridge->dev = NULL; + drm_bridge_put(bridge); } /** @@ -925,11 +941,11 @@ static int select_bus_fmt_recursive(struct drm_bridge *first_bridge, { unsigned int i, num_in_bus_fmts = 0; struct drm_bridge_state *cur_state; - struct drm_bridge *prev_bridge; + struct drm_bridge *prev_bridge __free(drm_bridge_put) = + drm_bridge_get_prev_bridge(cur_bridge); u32 *in_bus_fmts; int ret; - prev_bridge = drm_bridge_get_prev_bridge(cur_bridge); cur_state = drm_atomic_get_new_bridge_state(crtc_state->state, cur_bridge); @@ -1211,6 +1227,7 @@ EXPORT_SYMBOL(drm_atomic_bridge_chain_check); /** * drm_bridge_detect - check if anything is attached to the bridge output * @bridge: bridge control structure + * @connector: attached connector * * If the bridge supports output detection, as reported by the * DRM_BRIDGE_OP_DETECT bridge ops flag, call &drm_bridge_funcs.detect for the @@ -1221,12 +1238,13 @@ EXPORT_SYMBOL(drm_atomic_bridge_chain_check); * The detection status on success, or connector_status_unknown if the bridge * doesn't support output detection. */ -enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge) +enum drm_connector_status +drm_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { if (!(bridge->ops & DRM_BRIDGE_OP_DETECT)) return connector_status_unknown; - return bridge->funcs->detect(bridge); + return bridge->funcs->detect(bridge, connector); } EXPORT_SYMBOL_GPL(drm_bridge_detect); @@ -1417,6 +1435,9 @@ static void drm_bridge_debugfs_show_bridge(struct drm_printer *p, unsigned int idx) { drm_printf(p, "bridge[%u]: %ps\n", idx, bridge->funcs); + + drm_printf(p, "\trefcount: %u\n", kref_read(&bridge->refcount)); + drm_printf(p, "\ttype: [%d] %s\n", bridge->type, drm_get_connector_type_name(bridge->type)); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index a1e652b7631d..a94061f373de 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -406,6 +406,49 @@ drm_get_buddy(struct drm_buddy_block *block) EXPORT_SYMBOL(drm_get_buddy); /** + * drm_buddy_reset_clear - reset blocks clear state + * + * @mm: DRM buddy manager + * @is_clear: blocks clear state + * + * Reset the clear state based on @is_clear value for each block + * in the freelist. + */ +void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear) +{ + u64 root_size, size, start; + unsigned int order; + int i; + + size = mm->size; + for (i = 0; i < mm->n_roots; ++i) { + order = ilog2(size) - ilog2(mm->chunk_size); + start = drm_buddy_block_offset(mm->roots[i]); + __force_merge(mm, start, start + size, order); + + root_size = mm->chunk_size << order; + size -= root_size; + } + + for (i = 0; i <= mm->max_order; ++i) { + struct drm_buddy_block *block; + + list_for_each_entry_reverse(block, &mm->free_list[i], link) { + if (is_clear != drm_buddy_block_is_clear(block)) { + if (is_clear) { + mark_cleared(block); + mm->clear_avail += drm_buddy_block_size(mm, block); + } else { + clear_reset(block); + mm->clear_avail -= drm_buddy_block_size(mm, block); + } + } + } + } +} +EXPORT_SYMBOL(drm_buddy_reset_clear); + +/** * drm_buddy_free_block - free a block * * @mm: DRM buddy manager diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 7051c9c909c2..ea1d2d5d2c66 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -93,8 +93,7 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages) return; } - if (wbinvd_on_all_cpus()) - pr_err("Timed out waiting for cache flush\n"); + wbinvd_on_all_cpus(); #elif defined(__powerpc__) unsigned long i; @@ -139,8 +138,7 @@ drm_clflush_sg(struct sg_table *st) return; } - if (wbinvd_on_all_cpus()) - pr_err("Timed out waiting for cache flush\n"); + wbinvd_on_all_cpus(); #else WARN_ONCE(1, "Architecture has no drm_cache.c support\n"); #endif @@ -172,8 +170,7 @@ drm_clflush_virt_range(void *addr, unsigned long length) return; } - if (wbinvd_on_all_cpus()) - pr_err("Timed out waiting for cache flush\n"); + wbinvd_on_all_cpus(); #else WARN_ONCE(1, "Architecture has no drm_cache.c support\n"); #endif diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 37a3270bc3c2..131c1c9ae92f 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -817,6 +817,40 @@ void drm_crtc_load_palette_8(struct drm_crtc *crtc, const struct drm_color_lut * } EXPORT_SYMBOL(drm_crtc_load_palette_8); +static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b, + drm_crtc_set_lut_func set_palette) +{ + unsigned int i = (r << 5) | (g << 2) | b; /* 8-bit palette index */ + + /* Expand R (3-bit) G (3-bit) and B (2-bit) values to 16-bit values */ + r = (r << 13) | (r << 10) | (r << 7) | (r << 4) | (r << 1) | (r >> 2); + g = (g << 13) | (g << 10) | (g << 7) | (g << 4) | (g << 1) | (g >> 2); + b = (b << 14) | (b << 12) | (b << 10) | (b << 8) | (b << 6) | (b << 4) | (b << 2) | b; + + set_palette(crtc, i, r, g, b); +} + +/** + * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats + * @crtc: The displaying CRTC + * @set_palette: Callback for programming the hardware gamma LUT + * + * Programs an RGB332 palette to hardware. + */ +void drm_crtc_fill_palette_332(struct drm_crtc *crtc, drm_crtc_set_lut_func set_palette) +{ + unsigned int r, g, b; + + /* Limits of 8-8-4 are the maximum number of values for each channel. */ + for (r = 0; r < 8; ++r) { + for (g = 0; g < 8; ++g) { + for (b = 0; b < 4; ++b) + fill_palette_332(crtc, r, g, b, set_palette); + } + } +} +EXPORT_SYMBOL(drm_crtc_fill_palette_332); + static void fill_palette_8(struct drm_crtc *crtc, unsigned int i, drm_crtc_set_lut_func set_palette) { diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index abceb28b23fc..365cf337529f 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -44,6 +44,9 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" +static struct dentry *accel_debugfs_root; +static struct dentry *drm_debugfs_root; + /*************************************************** * Initialization, etc. **************************************************/ @@ -287,16 +290,120 @@ int drm_debugfs_remove_files(const struct drm_info_list *files, int count, } EXPORT_SYMBOL(drm_debugfs_remove_files); +void drm_debugfs_bridge_params(void) +{ + drm_bridge_debugfs_params(drm_debugfs_root); +} + +void drm_debugfs_init_root(void) +{ + drm_debugfs_root = debugfs_create_dir("dri", NULL); +#if IS_ENABLED(CONFIG_DRM_ACCEL) + accel_debugfs_root = debugfs_create_dir("accel", NULL); +#endif +} + +void drm_debugfs_remove_root(void) +{ +#if IS_ENABLED(CONFIG_DRM_ACCEL) + debugfs_remove(accel_debugfs_root); +#endif + debugfs_remove(drm_debugfs_root); +} + +static int drm_debugfs_proc_info_show(struct seq_file *m, void *unused) +{ + struct pid *pid; + struct task_struct *task; + struct drm_file *file = m->private; + + if (!file) + return -EINVAL; + + rcu_read_lock(); + pid = rcu_dereference(file->pid); + task = pid_task(pid, PIDTYPE_TGID); + + seq_printf(m, "pid: %d\n", task ? task->pid : 0); + seq_printf(m, "comm: %s\n", task ? task->comm : "Unset"); + rcu_read_unlock(); + return 0; +} + +static int drm_debufs_proc_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, drm_debugfs_proc_info_show, inode->i_private); +} + +static const struct file_operations drm_debugfs_proc_info_fops = { + .owner = THIS_MODULE, + .open = drm_debufs_proc_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * drm_debugfs_clients_add - Add a per client debugfs directory + * @file: drm_file for a client + * + * Create the debugfs directory for each client. This will be used to populate + * driver specific data for each client. + * + * Also add the process information debugfs file for each client to tag + * which client belongs to which process. + */ +void drm_debugfs_clients_add(struct drm_file *file) +{ + char *client; + + client = kasprintf(GFP_KERNEL, "client-%llu", file->client_id); + if (!client) + return; + + /* Create a debugfs directory for the client in root on drm debugfs */ + file->debugfs_client = debugfs_create_dir(client, drm_debugfs_root); + kfree(client); + + debugfs_create_file("proc_info", 0444, file->debugfs_client, file, + &drm_debugfs_proc_info_fops); + + client = kasprintf(GFP_KERNEL, "../%s", file->minor->dev->unique); + if (!client) + return; + + /* Create a link from client_id to the drm device this client id belongs to */ + debugfs_create_symlink("device", file->debugfs_client, client); + kfree(client); +} + +/** + * drm_debugfs_clients_remove - removes all debugfs directories and files + * @file: drm_file for a client + * + * Removes the debugfs directories recursively from the client directory. + * + * There is also a possibility that debugfs files are open while the drm_file + * is released. + */ +void drm_debugfs_clients_remove(struct drm_file *file) +{ + debugfs_remove_recursive(file->debugfs_client); + file->debugfs_client = NULL; +} + /** * drm_debugfs_dev_init - create debugfs directory for the device * @dev: the device which we want to create the directory for - * @root: the parent directory depending on the device type * * Creates the debugfs directory for the device under the given root directory. */ -void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root) +void drm_debugfs_dev_init(struct drm_device *dev) { - dev->debugfs_root = debugfs_create_dir(dev->unique, root); + if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) + dev->debugfs_root = debugfs_create_dir(dev->unique, accel_debugfs_root); + else + dev->debugfs_root = debugfs_create_dir(dev->unique, drm_debugfs_root); } /** @@ -323,14 +430,13 @@ void drm_debugfs_dev_register(struct drm_device *dev) drm_atomic_debugfs_init(dev); } -int drm_debugfs_register(struct drm_minor *minor, int minor_id, - struct dentry *root) +int drm_debugfs_register(struct drm_minor *minor, int minor_id) { struct drm_device *dev = minor->dev; char name[64]; sprintf(name, "%d", minor_id); - minor->debugfs_symlink = debugfs_create_symlink(name, root, + minor->debugfs_symlink = debugfs_create_symlink(name, drm_debugfs_root, dev->unique); /* TODO: Only for compatibility with drivers */ diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c index 9dc0408fbbea..5b956229c82f 100644 --- a/drivers/gpu/drm/drm_draw.c +++ b/drivers/gpu/drm/drm_draw.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(drm_draw_fill16); void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color) + u32 color) { unsigned int y, x; diff --git a/drivers/gpu/drm/drm_draw_internal.h b/drivers/gpu/drm/drm_draw_internal.h index f121ee7339dc..20cb404e23ea 100644 --- a/drivers/gpu/drm/drm_draw_internal.h +++ b/drivers/gpu/drm/drm_draw_internal.h @@ -47,7 +47,7 @@ void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color); + u32 color); void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 02556363e918..8e3cb08241c8 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -72,8 +72,6 @@ DEFINE_XARRAY_ALLOC(drm_minors_xa); */ static bool drm_core_init_complete; -static struct dentry *drm_debugfs_root; - DEFINE_STATIC_SRCU(drm_unplug_srcu); /* @@ -186,8 +184,7 @@ static int drm_minor_register(struct drm_device *dev, enum drm_minor_type type) return 0; if (minor->type != DRM_MINOR_ACCEL) { - ret = drm_debugfs_register(minor, minor->index, - drm_debugfs_root); + ret = drm_debugfs_register(minor, minor->index); if (ret) { DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n"); goto err_debugfs; @@ -535,6 +532,8 @@ static const char *drm_get_wedge_recovery(unsigned int opt) return "rebind"; case DRM_WEDGE_RECOVERY_BUS_RESET: return "bus-reset"; + case DRM_WEDGE_RECOVERY_VENDOR: + return "vendor-specific"; default: return NULL; } @@ -697,7 +696,6 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); - mutex_destroy(&dev->struct_mutex); } static int drm_dev_init(struct drm_device *dev, @@ -738,7 +736,6 @@ static int drm_dev_init(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); - mutex_init(&dev->struct_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); @@ -787,10 +784,7 @@ static int drm_dev_init(struct drm_device *dev, goto err; } - if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) - accel_debugfs_init(dev); - else - drm_debugfs_dev_init(dev, drm_debugfs_root); + drm_debugfs_dev_init(dev); return 0; @@ -1230,7 +1224,7 @@ static void drm_core_exit(void) drm_panic_exit(); accel_core_exit(); unregister_chrdev(DRM_MAJOR, "drm"); - debugfs_remove(drm_debugfs_root); + drm_debugfs_remove_root(); drm_sysfs_destroy(); WARN_ON(!xa_empty(&drm_minors_xa)); drm_connector_ida_destroy(); @@ -1249,8 +1243,8 @@ static int __init drm_core_init(void) goto error; } - drm_debugfs_root = debugfs_create_dir("dri", NULL); - drm_bridge_debugfs_params(drm_debugfs_root); + drm_debugfs_init_root(); + drm_debugfs_bridge_params(); ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops); if (ret < 0) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 3952e27447ee..eebd1a05ee97 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -46,6 +46,7 @@ #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_print.h> +#include <drm/drm_debugfs.h> #include "drm_crtc_internal.h" #include "drm_internal.h" @@ -168,6 +169,9 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) drm_prime_init_file_private(&file->prime); + if (!drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) + drm_debugfs_clients_add(file); + if (dev->driver->open) { ret = dev->driver->open(dev, file); if (ret < 0) @@ -182,6 +186,10 @@ out_prime_destroy: drm_syncobj_release(file); if (drm_core_check_feature(dev, DRIVER_GEM)) drm_gem_release(dev, file); + + if (!drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) + drm_debugfs_clients_remove(file); + put_pid(rcu_access_pointer(file->pid)); kfree(file); @@ -236,6 +244,9 @@ void drm_file_free(struct drm_file *file) (long)old_encode_dev(file->minor->kdev->devt), atomic_read(&dev->open_count)); + if (!drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) + drm_debugfs_clients_remove(file); + drm_events_release(file); if (drm_core_check_feature(dev, DRIVER_MODESET)) { diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 99d9f7bbc261..006836554cc2 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -559,18 +559,6 @@ static void drm_fb_xrgb8888_to_rgb565_line(void *dbuf, const void *sbuf, unsigne drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb565); } -static __always_inline u32 drm_xrgb8888_to_rgb565_swab(u32 pix) -{ - return swab16(drm_pixel_xrgb8888_to_rgb565(pix)); -} - -/* TODO: implement this helper as conversion to RGB565|BIG_ENDIAN */ -static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, - unsigned int pixels) -{ - drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_xrgb8888_to_rgb565_swab); -} - /** * drm_fb_xrgb8888_to_rgb565 - Convert XRGB8888 to RGB565 clip buffer * @dst: Array of RGB565 destination buffers @@ -580,7 +568,6 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, * @fb: DRM framebuffer * @clip: Clip rectangle area to copy * @state: Transform and conversion state - * @swab: Swap bytes * * This function copies parts of a framebuffer to display memory and converts the * color format during the process. Destination and framebuffer formats must match. The @@ -595,23 +582,56 @@ static void drm_fb_xrgb8888_to_rgb565_swab_line(void *dbuf, const void *sbuf, */ void drm_fb_xrgb8888_to_rgb565(struct iosys_map *dst, const unsigned int *dst_pitch, const struct iosys_map *src, const struct drm_framebuffer *fb, - const struct drm_rect *clip, struct drm_format_conv_state *state, - bool swab) + const struct drm_rect *clip, struct drm_format_conv_state *state) { static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { 2, }; - void (*xfrm_line)(void *dbuf, const void *sbuf, unsigned int npixels); + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, + drm_fb_xrgb8888_to_rgb565_line); +} +EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); - if (swab) - xfrm_line = drm_fb_xrgb8888_to_rgb565_swab_line; - else - xfrm_line = drm_fb_xrgb8888_to_rgb565_line; +static void drm_fb_xrgb8888_to_rgb565be_line(void *dbuf, const void *sbuf, + unsigned int pixels) +{ + drm_fb_xfrm_line_32to16(dbuf, sbuf, pixels, drm_pixel_xrgb8888_to_rgb565be); +} - drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, xfrm_line); +/** + * drm_fb_xrgb8888_to_rgb565be - Convert XRGB8888 to RGB565|DRM_FORMAT_BIG_ENDIAN clip buffer + * @dst: Array of RGB565BE destination buffers + * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines + * within @dst; can be NULL if scanlines are stored next to each other. + * @src: Array of XRGB8888 source buffer + * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy + * @state: Transform and conversion state + * + * This function copies parts of a framebuffer to display memory and converts the + * color format during the process. Destination and framebuffer formats must match. The + * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at + * least as many entries as there are planes in @fb's format. Each entry stores the + * value for the format's respective color plane at the same index. + * + * This function does not apply clipping on @dst (i.e. the destination is at the + * top-left corner). + * + * Drivers can use this function for RGB565BE devices that don't support XRGB8888 natively. + */ +void drm_fb_xrgb8888_to_rgb565be(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip, struct drm_format_conv_state *state) +{ + static const u8 dst_pixsize[DRM_FORMAT_MAX_PLANES] = { + 2, + }; + + drm_fb_xfrm(dst, dst_pitch, dst_pixsize, src, fb, clip, false, state, + drm_fb_xrgb8888_to_rgb565be_line); } -EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565); +EXPORT_SYMBOL(drm_fb_xrgb8888_to_rgb565be); static void drm_fb_xrgb8888_to_xrgb1555_line(void *dbuf, const void *sbuf, unsigned int pixels) { @@ -1188,7 +1208,7 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d return 0; } else if (fb_format == DRM_FORMAT_XRGB8888) { if (dst_format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state, false); + drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, state); return 0; } else if (dst_format == DRM_FORMAT_XRGB1555) { drm_fb_xrgb8888_to_xrgb1555(dst, dst_pitch, src, fb, clip, state); @@ -1223,6 +1243,9 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d } else if (dst_format == DRM_FORMAT_BGRX8888) { drm_fb_swab(dst, dst_pitch, src, fb, clip, false, state); return 0; + } else if (dst_format == DRM_FORMAT_RGB332) { + drm_fb_xrgb8888_to_rgb332(dst, dst_pitch, src, fb, clip, state); + return 0; } } @@ -1233,6 +1256,25 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d } EXPORT_SYMBOL(drm_fb_blit); +static void drm_fb_gray8_to_gray2_line(void *dbuf, const void *sbuf, unsigned int pixels) +{ + u8 *dbuf8 = dbuf; + const u8 *sbuf8 = sbuf; + u8 px; + + while (pixels) { + unsigned int i, bits = min(pixels, 4U); + u8 byte = 0; + + for (i = 0; i < bits; i++, pixels--) { + byte >>= 2; + px = (*sbuf8++ * 3 + 127) / 255; + byte |= (px &= 0x03) << 6; + } + *dbuf8++ = byte; + } +} + static void drm_fb_gray8_to_mono_line(void *dbuf, const void *sbuf, unsigned int pixels) { u8 *dbuf8 = dbuf; @@ -1339,3 +1381,92 @@ void drm_fb_xrgb8888_to_mono(struct iosys_map *dst, const unsigned int *dst_pitc } } EXPORT_SYMBOL(drm_fb_xrgb8888_to_mono); + +/** + * drm_fb_xrgb8888_to_gray2 - Convert XRGB8888 to gray2 + * @dst: Array of gray2 destination buffer + * @dst_pitch: Array of numbers of bytes between the start of two consecutive scanlines + * within @dst; can be NULL if scanlines are stored next to each other. + * @src: Array of XRGB8888 source buffers + * @fb: DRM framebuffer + * @clip: Clip rectangle area to copy + * @state: Transform and conversion state + * + * This function copies parts of a framebuffer to display memory and converts the + * color format during the process. Destination and framebuffer formats must match. The + * parameters @dst, @dst_pitch and @src refer to arrays. Each array must have at + * least as many entries as there are planes in @fb's format. Each entry stores the + * value for the format's respective color plane at the same index. + * + * This function does not apply clipping on @dst (i.e. the destination is at the + * top-left corner). The first pixel (upper left corner of the clip rectangle) will + * be converted and copied to the two first bits (LSB) in the first byte of the gray2 + * destination buffer. If the caller requires that the first pixel in a byte must + * be located at an x-coordinate that is a multiple of 8, then the caller must take + * care itself of supplying a suitable clip rectangle. + * + * DRM doesn't have native gray2 support. Drivers can use this function for + * gray2 devices that don't support XRGB8888 natively. Such drivers can + * announce the commonly supported XR24 format to userspace and use this function + * to convert to the native format. + * + */ +void drm_fb_xrgb8888_to_gray2(struct iosys_map *dst, const unsigned int *dst_pitch, + const struct iosys_map *src, const struct drm_framebuffer *fb, + const struct drm_rect *clip, struct drm_format_conv_state *state) +{ + static const unsigned int default_dst_pitch[DRM_FORMAT_MAX_PLANES] = { + 0, 0, 0, 0 + }; + unsigned int linepixels = drm_rect_width(clip); + unsigned int lines = drm_rect_height(clip); + unsigned int cpp = fb->format->cpp[0]; + unsigned int len_src32 = linepixels * cpp; + struct drm_device *dev = fb->dev; + void *vaddr = src[0].vaddr; + unsigned int dst_pitch_0; + unsigned int y; + u8 *gray2 = dst[0].vaddr, *gray8; + u32 *src32; + + if (drm_WARN_ON(dev, fb->format->format != DRM_FORMAT_XRGB8888)) + return; + + if (!dst_pitch) + dst_pitch = default_dst_pitch; + dst_pitch_0 = dst_pitch[0]; + + /* + * The gray2 destination buffer contains 2 bit per pixel + */ + if (!dst_pitch_0) + dst_pitch_0 = DIV_ROUND_UP(linepixels, 4); + + /* + * The dma memory is write-combined so reads are uncached. + * Speed up by fetching one line at a time. + * + * Also, format conversion from XR24 to gray2 are done + * line-by-line but are converted to 8-bit grayscale as an + * intermediate step. + * + * Allocate a buffer to be used for both copying from the cma + * memory and to store the intermediate grayscale line pixels. + */ + src32 = drm_format_conv_state_reserve(state, len_src32 + linepixels, GFP_KERNEL); + if (!src32) + return; + + gray8 = (u8 *)src32 + len_src32; + + vaddr += clip_offset(clip, fb->pitches[0], cpp); + for (y = 0; y < lines; y++) { + src32 = memcpy(src32, vaddr, len_src32); + drm_fb_xrgb8888_to_gray8_line(gray8, src32, linepixels); + drm_fb_gray8_to_gray2_line(gray2, gray8, linepixels); + vaddr += fb->pitches[0]; + gray2 += dst_pitch_0; + } +} +EXPORT_SYMBOL(drm_fb_xrgb8888_to_gray2); + diff --git a/drivers/gpu/drm/drm_format_internal.h b/drivers/gpu/drm/drm_format_internal.h index 9428d5cfebc5..ce29dd05bcc5 100644 --- a/drivers/gpu/drm/drm_format_internal.h +++ b/drivers/gpu/drm/drm_format_internal.h @@ -5,6 +5,7 @@ #include <linux/bits.h> #include <linux/types.h> +#include <linux/swab.h> /* * Each pixel-format conversion helper takes a raw pixel in a @@ -59,6 +60,11 @@ static inline u32 drm_pixel_xrgb8888_to_rgb565(u32 pix) ((pix & 0x000000f8) >> 3); } +static inline u32 drm_pixel_xrgb8888_to_rgb565be(u32 pix) +{ + return swab16(drm_pixel_xrgb8888_to_rgb565(pix)); +} + static inline u32 drm_pixel_xrgb8888_to_rgbx5551(u32 pix) { return ((pix & 0x00f80000) >> 8) | diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 55ddd99fd7f6..e0d533611040 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -238,6 +238,14 @@ const struct drm_format_info *__drm_format_info(u32 format) { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_BGRA1010102, .depth = 30, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, + { .format = DRM_FORMAT_RGB161616, .depth = 0, + .num_planes = 1, .char_per_block = { 6, 0, 0 }, + .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 }, + .hsub = 1, .vsub = 1, .has_alpha = false }, + { .format = DRM_FORMAT_BGR161616, .depth = 0, + .num_planes = 1, .char_per_block = { 6, 0, 0 }, + .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 }, + .hsub = 1, .vsub = 1, .has_alpha = false }, { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, { .format = DRM_FORMAT_RGBA8888, .depth = 32, .num_planes = 1, .cpp = { 4, 0, 0 }, .hsub = 1, .vsub = 1, .has_alpha = true }, @@ -409,7 +417,8 @@ EXPORT_SYMBOL(drm_format_info); /** * drm_get_format_info - query information for a given framebuffer configuration * @dev: DRM device - * @mode_cmd: metadata from the userspace fb creation request + * @pixel_format: pixel format (DRM_FORMAT_*) + * @modifier: modifier * * Returns: * The instance of struct drm_format_info that describes the pixel format, or @@ -417,15 +426,16 @@ EXPORT_SYMBOL(drm_format_info); */ const struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd) + u32 pixel_format, u64 modifier) { const struct drm_format_info *info = NULL; if (dev->mode_config.funcs->get_format_info) - info = dev->mode_config.funcs->get_format_info(mode_cmd); + info = dev->mode_config.funcs->get_format_info(pixel_format, + modifier); if (!info) - info = drm_format_info(mode_cmd->pixel_format); + info = drm_format_info(pixel_format); return info; } diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..adbb73f00d68 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -153,18 +153,11 @@ int drm_mode_addfb_ioctl(struct drm_device *dev, } static int framebuffer_check(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *r) { - const struct drm_format_info *info; int i; - /* check if the format is supported at all */ - if (!__drm_format_info(r->pixel_format)) { - drm_dbg_kms(dev, "bad framebuffer format %p4cc\n", - &r->pixel_format); - return -EINVAL; - } - if (r->width == 0) { drm_dbg_kms(dev, "bad framebuffer width %u\n", r->width); return -EINVAL; @@ -175,9 +168,6 @@ static int framebuffer_check(struct drm_device *dev, return -EINVAL; } - /* now let the driver pick its own format info */ - info = drm_get_format_info(dev, r); - for (i = 0; i < info->num_planes; i++) { unsigned int width = drm_format_info_plane_width(info, r->width, i); unsigned int height = drm_format_info_plane_height(info, r->height, i); @@ -272,6 +262,7 @@ drm_internal_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv) { struct drm_mode_config *config = &dev->mode_config; + const struct drm_format_info *info; struct drm_framebuffer *fb; int ret; @@ -297,11 +288,21 @@ drm_internal_framebuffer_create(struct drm_device *dev, return ERR_PTR(-EINVAL); } - ret = framebuffer_check(dev, r); + /* check if the format is supported at all */ + if (!__drm_format_info(r->pixel_format)) { + drm_dbg_kms(dev, "bad framebuffer format %p4cc\n", + &r->pixel_format); + return ERR_PTR(-EINVAL); + } + + /* now let the driver pick its own format info */ + info = drm_get_format_info(dev, r->pixel_format, r->modifier[0]); + + ret = framebuffer_check(dev, info, r); if (ret) return ERR_PTR(ret); - fb = dev->mode_config.funcs->fb_create(dev, file_priv, r); + fb = dev->mode_config.funcs->fb_create(dev, file_priv, info, r); if (IS_ERR(fb)) { drm_dbg_kms(dev, "could not create framebuffer\n"); return fb; @@ -862,11 +863,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free); int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -875,7 +888,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -883,7 +896,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -960,6 +982,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 0905ef6786e9..f884d155a832 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -187,6 +187,7 @@ void drm_gem_private_object_init(struct drm_device *dev, kref_init(&obj->refcount); obj->handle_count = 0; obj->size = size; + mutex_init(&obj->gpuva.lock); dma_resv_init(&obj->_resv); if (!obj->resv) obj->resv = &obj->_resv; @@ -210,9 +211,50 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) WARN_ON(obj->dma_buf); dma_resv_fini(&obj->_resv); + mutex_destroy(&obj->gpuva.lock); } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise + */ +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + + drm_gem_object_handle_get(obj); + + return true; +} + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -243,20 +285,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -280,10 +328,18 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); + mutex_lock(&file_priv->prime.lock); + drm_prime_remove_buf_handle(&file_priv->prime, id); + + mutex_unlock(&file_priv->prime.lock); + drm_vma_node_revoke(&obj->vma_node, file_priv); drm_gem_object_handle_put_unlocked(obj); @@ -390,8 +446,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform @@ -400,7 +456,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -421,6 +477,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; @@ -567,7 +628,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) struct page **pages; struct folio *folio; struct folio_batch fbatch; - long i, j, npages; + unsigned long i, j, npages; if (WARN_ON(!obj->filp)) return ERR_PTR(-EINVAL); @@ -591,7 +652,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) i = 0; while (i < npages) { - long nr; + unsigned long nr; folio = shmem_read_folio_gfp(mapping, i, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) @@ -816,14 +877,6 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, } EXPORT_SYMBOL(drm_gem_dma_resv_wait); -/** - * drm_gem_close_ioctl - implementation of the GEM_CLOSE ioctl - * @dev: drm_device - * @data: ioctl data - * @file_priv: drm file-private structure - * - * Releases the handle to an mm object. - */ int drm_gem_close_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -839,17 +892,6 @@ drm_gem_close_ioctl(struct drm_device *dev, void *data, return ret; } -/** - * drm_gem_flink_ioctl - implementation of the GEM_FLINK ioctl - * @dev: drm_device - * @data: ioctl data - * @file_priv: drm file-private structure - * - * Create a global name for an object, returning the name. - * - * Note that the name does not hold a reference; when the object - * is freed, the name goes away. - */ int drm_gem_flink_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -889,17 +931,6 @@ err: return ret; } -/** - * drm_gem_open_ioctl - implementation of the GEM_OPEN ioctl - * @dev: drm_device - * @data: ioctl data - * @file_priv: drm file-private structure - * - * Open an object using the global name, returning a handle and the size. - * - * This handle (of course) holds a reference to the object, so the object - * will not go away until the handle is deleted. - */ int drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -934,6 +965,57 @@ err: return ret; } +int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_gem_change_handle *args = data; + struct drm_gem_object *obj; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_GEM)) + return -EOPNOTSUPP; + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) + return -ENOENT; + + if (args->handle == args->new_handle) + return 0; + + mutex_lock(&file_priv->prime.lock); + + spin_lock(&file_priv->table_lock); + ret = idr_alloc(&file_priv->object_idr, obj, + args->new_handle, args->new_handle + 1, GFP_NOWAIT); + spin_unlock(&file_priv->table_lock); + + if (ret < 0) + goto out_unlock; + + if (obj->dma_buf) { + ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, args->new_handle); + if (ret < 0) { + spin_lock(&file_priv->table_lock); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + drm_prime_remove_buf_handle(&file_priv->prime, args->handle); + } + + ret = 0; + + spin_lock(&file_priv->table_lock); + idr_remove(&file_priv->object_idr, args->handle); + spin_unlock(&file_priv->table_lock); + +out_unlock: + mutex_unlock(&file_priv->prime.lock); + + return ret; +} + /** * drm_gem_open - initializes GEM file-private structures at devnode open time * @dev: drm_device which is being opened by userspace diff --git a/drivers/gpu/drm/drm_gem_dma_helper.c b/drivers/gpu/drm/drm_gem_dma_helper.c index b7f033d4352a..4f0320df858f 100644 --- a/drivers/gpu/drm/drm_gem_dma_helper.c +++ b/drivers/gpu/drm/drm_gem_dma_helper.c @@ -230,7 +230,7 @@ void drm_gem_dma_free(struct drm_gem_dma_object *dma_obj) if (drm_gem_is_imported(gem_obj)) { if (dma_obj->vaddr) - dma_buf_vunmap_unlocked(gem_obj->dma_buf, &map); + dma_buf_vunmap_unlocked(gem_obj->import_attach->dmabuf, &map); drm_prime_gem_destroy(gem_obj, dma_obj->sgt); } else if (dma_obj->vaddr) { if (dma_obj->map_noncoherent) diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 618ce725cd75..4bc89d33df59 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_get_obj); static int drm_gem_fb_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **obj, unsigned int num_planes, const struct drm_framebuffer_funcs *funcs) @@ -75,7 +76,7 @@ drm_gem_fb_init(struct drm_device *dev, unsigned int i; int ret; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); for (i = 0; i < num_planes; i++) fb->obj[i] = obj[i]; @@ -136,6 +137,7 @@ EXPORT_SYMBOL(drm_gem_fb_create_handle); * @dev: DRM device * @fb: framebuffer object * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @funcs: vtable to be used for the new framebuffer object * @@ -152,20 +154,14 @@ EXPORT_SYMBOL(drm_gem_fb_create_handle); int drm_gem_fb_init_with_funcs(struct drm_device *dev, struct drm_framebuffer *fb, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs) { - const struct drm_format_info *info; struct drm_gem_object *objs[DRM_FORMAT_MAX_PLANES]; unsigned int i; int ret; - info = drm_get_format_info(dev, mode_cmd); - if (!info) { - drm_dbg_kms(dev, "Failed to get FB format info\n"); - return -EINVAL; - } - if (drm_drv_uses_atomic_modeset(dev) && !drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -200,7 +196,7 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, } } - ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); + ret = drm_gem_fb_init(dev, fb, info, mode_cmd, objs, i, funcs); if (ret) goto err_gem_object_put; @@ -221,6 +217,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs); * callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @funcs: vtable to be used for the new framebuffer object * @@ -233,6 +230,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs); */ struct drm_framebuffer * drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs) { @@ -243,7 +241,7 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, if (!fb) return ERR_PTR(-ENOMEM); - ret = drm_gem_fb_init_with_funcs(dev, fb, file, mode_cmd, funcs); + ret = drm_gem_fb_init_with_funcs(dev, fb, file, info, mode_cmd, funcs); if (ret) { kfree(fb); return ERR_PTR(ret); @@ -263,6 +261,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs = { * &drm_mode_config_funcs.fb_create callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * * This function creates a new framebuffer object described by @@ -282,9 +281,10 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs = { */ struct drm_framebuffer * drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file, info, mode_cmd, &drm_gem_fb_funcs); } EXPORT_SYMBOL_GPL(drm_gem_fb_create); @@ -300,6 +300,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs_dirtyfb = { * &drm_mode_config_funcs.fb_create callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * * This function creates a new framebuffer object described by @@ -320,9 +321,10 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs_dirtyfb = { */ struct drm_framebuffer * drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file, info, mode_cmd, &drm_gem_fb_funcs_dirtyfb); } EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_dirty); @@ -420,6 +422,7 @@ EXPORT_SYMBOL(drm_gem_fb_vunmap); static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir, unsigned int num_planes) { + struct dma_buf_attachment *import_attach; struct drm_gem_object *obj; int ret; @@ -428,9 +431,10 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat obj = drm_gem_fb_get_obj(fb, num_planes); if (!obj) continue; + import_attach = obj->import_attach; if (!drm_gem_is_imported(obj)) continue; - ret = dma_buf_end_cpu_access(obj->dma_buf, dir); + ret = dma_buf_end_cpu_access(import_attach->dmabuf, dir); if (ret) drm_err(fb->dev, "dma_buf_end_cpu_access(%u, %d) failed: %d\n", ret, num_planes, dir); @@ -453,6 +457,7 @@ static void __drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_dat */ int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir) { + struct dma_buf_attachment *import_attach; struct drm_gem_object *obj; unsigned int i; int ret; @@ -463,9 +468,10 @@ int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direct ret = -EINVAL; goto err___drm_gem_fb_end_cpu_access; } + import_attach = obj->import_attach; if (!drm_gem_is_imported(obj)) continue; - ret = dma_buf_begin_cpu_access(obj->dma_buf, dir); + ret = dma_buf_begin_cpu_access(import_attach->dmabuf, dir); if (ret) goto err___drm_gem_fb_end_cpu_access; } @@ -498,12 +504,9 @@ EXPORT_SYMBOL(drm_gem_fb_end_cpu_access); // TODO Drop this function and replace by drm_format_info_bpp() once all // DRM_FORMAT_* provide proper block info in drivers/gpu/drm/drm_fourcc.c static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info; - - info = drm_get_format_info(dev, mode_cmd); - switch (info->format) { case DRM_FORMAT_YUV420_8BIT: return 12; @@ -517,6 +520,7 @@ static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, } static int drm_gem_afbc_min_size(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb) { @@ -557,7 +561,7 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, afbc_fb->aligned_height = ALIGN(mode_cmd->height, h_alignment); afbc_fb->offset = mode_cmd->offsets[0]; - bpp = drm_gem_afbc_get_bpp(dev, mode_cmd); + bpp = drm_gem_afbc_get_bpp(dev, info, mode_cmd); if (!bpp) { drm_dbg_kms(dev, "Invalid AFBC bpp value: %d\n", bpp); return -EINVAL; @@ -579,6 +583,7 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, * * @dev: DRM device * @afbc_fb: afbc-specific framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @afbc_fb: afbc framebuffer * @@ -592,19 +597,16 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, * Zero on success or a negative error value on failure. */ int drm_gem_fb_afbc_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb) { - const struct drm_format_info *info; struct drm_gem_object **objs; int ret; objs = afbc_fb->base.obj; - info = drm_get_format_info(dev, mode_cmd); - if (!info) - return -EINVAL; - ret = drm_gem_afbc_min_size(dev, mode_cmd, afbc_fb); + ret = drm_gem_afbc_min_size(dev, info, mode_cmd, afbc_fb); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 8ac0b1fa5287..5d1349c34afd 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -351,7 +351,7 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, dma_resv_assert_held(obj->resv); if (drm_gem_is_imported(obj)) { - ret = dma_buf_vmap(obj->dma_buf, map); + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); } else { pgprot_t prot = PAGE_KERNEL; @@ -413,7 +413,7 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, dma_resv_assert_held(obj->resv); if (drm_gem_is_imported(obj)) { - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); } else { dma_resv_assert_held(shmem->base.resv); diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index ab198645d90f..cb906765897e 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -9,10 +9,9 @@ #include <linux/dma-mapping.h> #include <linux/export.h> #include <linux/hmm.h> +#include <linux/hugetlb_inline.h> #include <linux/memremap.h> -#include <linux/migrate.h> #include <linux/mm_types.h> -#include <linux/pagemap.h> #include <linux/slab.h> #include <drm/drm_device.h> @@ -109,21 +108,6 @@ */ /** - * DOC: Migration - * - * The migration support is quite simple, allowing migration between RAM and - * device memory at the range granularity. For example, GPU SVM currently does - * not support mixing RAM and device memory pages within a range. This means - * that upon GPU fault, the entire range can be migrated to device memory, and - * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device - * memory storage within a range could be added in the future if required. - * - * The reasoning for only supporting range granularity is as follows: it - * simplifies the implementation, and range sizes are driver-defined and should - * be relatively small. - */ - -/** * DOC: Partial Unmapping of Ranges * * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting @@ -192,12 +176,9 @@ * } * * if (driver_migration_policy(range)) { - * mmap_read_lock(mm); - * devmem = driver_alloc_devmem(); - * err = drm_gpusvm_migrate_to_devmem(gpusvm, range, - * devmem_allocation, - * &ctx); - * mmap_read_unlock(mm); + * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), + * gpuva_start, gpuva_end, gpusvm->mm, + * ctx->timeslice_ms); * if (err) // CPU mappings may have changed * goto retry; * } @@ -290,95 +271,27 @@ npages_in_range(unsigned long start, unsigned long end) } /** - * struct drm_gpusvm_zdd - GPU SVM zone device data - * - * @refcount: Reference count for the zdd - * @devmem_allocation: device memory allocation - * @device_private_page_owner: Device private pages owner - * - * This structure serves as a generic wrapper installed in - * page->zone_device_data. It provides infrastructure for looking up a device - * memory allocation upon CPU page fault and asynchronously releasing device - * memory once the CPU has no page references. Asynchronous release is useful - * because CPU page references can be dropped in IRQ contexts, while releasing - * device memory likely requires sleeping locks. - */ -struct drm_gpusvm_zdd { - struct kref refcount; - struct drm_gpusvm_devmem *devmem_allocation; - void *device_private_page_owner; -}; - -/** - * drm_gpusvm_zdd_alloc() - Allocate a zdd structure. - * @device_private_page_owner: Device private pages owner - * - * This function allocates and initializes a new zdd structure. It sets up the - * reference count and initializes the destroy work. + * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM + * @gpusvm: Pointer to the GPU SVM structure. + * @start: Start address of the notifier + * @end: End address of the notifier * - * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. + * Return: A pointer to the drm_gpusvm_notifier if found or NULL */ -static struct drm_gpusvm_zdd * -drm_gpusvm_zdd_alloc(void *device_private_page_owner) +struct drm_gpusvm_notifier * +drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start, + unsigned long end) { - struct drm_gpusvm_zdd *zdd; - - zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); - if (!zdd) - return NULL; - - kref_init(&zdd->refcount); - zdd->devmem_allocation = NULL; - zdd->device_private_page_owner = device_private_page_owner; - - return zdd; -} + struct interval_tree_node *itree; -/** - * drm_gpusvm_zdd_get() - Get a reference to a zdd structure. - * @zdd: Pointer to the zdd structure. - * - * This function increments the reference count of the provided zdd structure. - * - * Return: Pointer to the zdd structure. - */ -static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd) -{ - kref_get(&zdd->refcount); - return zdd; -} + itree = interval_tree_iter_first(&gpusvm->root, start, end - 1); -/** - * drm_gpusvm_zdd_destroy() - Destroy a zdd structure. - * @ref: Pointer to the reference count structure. - * - * This function queues the destroy_work of the zdd for asynchronous destruction. - */ -static void drm_gpusvm_zdd_destroy(struct kref *ref) -{ - struct drm_gpusvm_zdd *zdd = - container_of(ref, struct drm_gpusvm_zdd, refcount); - struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation; - - if (devmem) { - complete_all(&devmem->detached); - if (devmem->ops->devmem_release) - devmem->ops->devmem_release(devmem); - } - kfree(zdd); -} - -/** - * drm_gpusvm_zdd_put() - Put a zdd reference. - * @zdd: Pointer to the zdd structure. - * - * This function decrements the reference count of the provided zdd structure - * and schedules its destruction if the count drops to zero. - */ -static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd) -{ - kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy); + if (itree) + return container_of(itree, struct drm_gpusvm_notifier, itree); + else + return NULL; } +EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find); /** * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier @@ -404,86 +317,6 @@ drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, EXPORT_SYMBOL_GPL(drm_gpusvm_range_find); /** - * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier - * @range__: Iterator variable for the ranges - * @next__: Iterator variable for the ranges temporay storage - * @notifier__: Pointer to the GPU SVM notifier - * @start__: Start address of the range - * @end__: End address of the range - * - * This macro is used to iterate over GPU SVM ranges in a notifier while - * removing ranges from it. - */ -#define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \ - for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \ - (next__) = __drm_gpusvm_range_next(range__); \ - (range__) && (drm_gpusvm_range_start(range__) < (end__)); \ - (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__)) - -/** - * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list - * @notifier: a pointer to the current drm_gpusvm_notifier - * - * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if - * the current notifier is the last one or if the input notifier is - * NULL. - */ -static struct drm_gpusvm_notifier * -__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier) -{ - if (notifier && !list_is_last(¬ifier->entry, - ¬ifier->gpusvm->notifier_list)) - return list_next_entry(notifier, entry); - - return NULL; -} - -static struct drm_gpusvm_notifier * -notifier_iter_first(struct rb_root_cached *root, unsigned long start, - unsigned long last) -{ - struct interval_tree_node *itree; - - itree = interval_tree_iter_first(root, start, last); - - if (itree) - return container_of(itree, struct drm_gpusvm_notifier, itree); - else - return NULL; -} - -/** - * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm - * @notifier__: Iterator variable for the notifiers - * @notifier__: Pointer to the GPU SVM notifier - * @start__: Start address of the notifier - * @end__: End address of the notifier - * - * This macro is used to iterate over GPU SVM notifiers in a gpusvm. - */ -#define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \ - for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \ - (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ - (notifier__) = __drm_gpusvm_notifier_next(notifier__)) - -/** - * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm - * @notifier__: Iterator variable for the notifiers - * @next__: Iterator variable for the notifiers temporay storage - * @notifier__: Pointer to the GPU SVM notifier - * @start__: Start address of the notifier - * @end__: End address of the notifier - * - * This macro is used to iterate over GPU SVM notifiers in a gpusvm while - * removing notifiers from it. - */ -#define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \ - for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \ - (next__) = __drm_gpusvm_notifier_next(notifier__); \ - (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \ - (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__)) - -/** * drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier. * @mni: Pointer to the mmu_interval_notifier structure. * @mmu_range: Pointer to the mmu_notifier_range structure. @@ -528,7 +361,6 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { * @name: Name of the GPU SVM. * @drm: Pointer to the DRM device structure. * @mm: Pointer to the mm_struct for the address space. - * @device_private_page_owner: Device private pages owner. * @mm_start: Start address of GPU SVM. * @mm_range: Range of the GPU SVM. * @notifier_size: Size of individual notifiers. @@ -540,23 +372,35 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { * * This function initializes the GPU SVM. * + * Note: If only using the simple drm_gpusvm_pages API (get/unmap/free), + * then only @gpusvm, @name, and @drm are expected. However, the same base + * @gpusvm can also be used with both modes together in which case the full + * setup is needed, where the core drm_gpusvm_pages API will simply never use + * the other fields. + * * Return: 0 on success, a negative error code on failure. */ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, const char *name, struct drm_device *drm, - struct mm_struct *mm, void *device_private_page_owner, + struct mm_struct *mm, unsigned long mm_start, unsigned long mm_range, unsigned long notifier_size, const struct drm_gpusvm_ops *ops, const unsigned long *chunk_sizes, int num_chunks) { - if (!ops->invalidate || !num_chunks) - return -EINVAL; + if (mm) { + if (!ops->invalidate || !num_chunks) + return -EINVAL; + mmgrab(mm); + } else { + /* No full SVM mode, only core drm_gpusvm_pages API. */ + if (ops || num_chunks || mm_range || notifier_size) + return -EINVAL; + } gpusvm->name = name; gpusvm->drm = drm; gpusvm->mm = mm; - gpusvm->device_private_page_owner = device_private_page_owner; gpusvm->mm_start = mm_start; gpusvm->mm_range = mm_range; gpusvm->notifier_size = notifier_size; @@ -564,7 +408,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, gpusvm->chunk_sizes = chunk_sizes; gpusvm->num_chunks = num_chunks; - mmgrab(mm); gpusvm->root = RB_ROOT_CACHED; INIT_LIST_HEAD(&gpusvm->notifier_list); @@ -583,22 +426,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_init); /** - * drm_gpusvm_notifier_find() - Find GPU SVM notifier - * @gpusvm: Pointer to the GPU SVM structure - * @fault_addr: Fault address - * - * This function finds the GPU SVM notifier associated with the fault address. - * - * Return: Pointer to the GPU SVM notifier on success, NULL otherwise. - */ -static struct drm_gpusvm_notifier * -drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, - unsigned long fault_addr) -{ - return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1); -} - -/** * to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node * @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct * @@ -672,7 +499,8 @@ void drm_gpusvm_fini(struct drm_gpusvm *gpusvm) drm_gpusvm_range_remove(gpusvm, range); } - mmdrop(gpusvm->mm); + if (gpusvm->mm) + mmdrop(gpusvm->mm); WARN_ON(!RB_EMPTY_ROOT(&gpusvm->root.rb_root)); } EXPORT_SYMBOL_GPL(drm_gpusvm_fini); @@ -812,18 +640,48 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, range->itree.start = ALIGN_DOWN(fault_addr, chunk_size); range->itree.last = ALIGN(fault_addr + 1, chunk_size) - 1; INIT_LIST_HEAD(&range->entry); - range->notifier_seq = LONG_MAX; - range->flags.migrate_devmem = migrate_devmem ? 1 : 0; + range->pages.notifier_seq = LONG_MAX; + range->pages.flags.migrate_devmem = migrate_devmem ? 1 : 0; return range; } /** + * drm_gpusvm_hmm_pfn_to_order() - Get the largest CPU mapping order. + * @hmm_pfn: The current hmm_pfn. + * @hmm_pfn_index: Index of the @hmm_pfn within the pfn array. + * @npages: Number of pages within the pfn array i.e the hmm range size. + * + * To allow skipping PFNs with the same flags (like when they belong to + * the same huge PTE) when looping over the pfn array, take a given a hmm_pfn, + * and return the largest order that will fit inside the CPU PTE, but also + * crucially accounting for the original hmm range boundaries. + * + * Return: The largest order that will safely fit within the size of the hmm_pfn + * CPU PTE. + */ +static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn, + unsigned long hmm_pfn_index, + unsigned long npages) +{ + unsigned long size; + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= (hmm_pfn & ~HMM_PFN_FLAGS) & (size - 1); + hmm_pfn_index += size; + if (hmm_pfn_index > npages) + size -= (hmm_pfn_index - npages); + + return ilog2(size); +} + +/** * drm_gpusvm_check_pages() - Check pages * @gpusvm: Pointer to the GPU SVM structure * @notifier: Pointer to the GPU SVM notifier structure * @start: Start address * @end: End address + * @dev_private_owner: The device private page owner * * Check if pages between start and end have been faulted in on the CPU. Use to * prevent migration of pages without CPU backing store. @@ -832,14 +690,15 @@ drm_gpusvm_range_alloc(struct drm_gpusvm *gpusvm, */ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + void *dev_private_owner) { struct hmm_range hmm_range = { .default_flags = 0, .notifier = ¬ifier->notifier, .start = start, .end = end, - .dev_private_owner = gpusvm->device_private_page_owner, + .dev_private_owner = dev_private_owner, }; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); @@ -876,7 +735,7 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm, err = -EFAULT; goto err_free; } - i += 0x1 << hmm_pfn_to_map_order(pfns[i]); + i += 0x1 << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); } err_free: @@ -893,6 +752,7 @@ err_free: * @gpuva_start: Start address of GPUVA which mirrors CPU * @gpuva_end: End address of GPUVA which mirrors CPU * @check_pages_threshold: Check CPU pages for present threshold + * @dev_private_owner: The device private page owner * * This function determines the chunk size for the GPU SVM range based on the * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual @@ -907,7 +767,8 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, unsigned long fault_addr, unsigned long gpuva_start, unsigned long gpuva_end, - unsigned long check_pages_threshold) + unsigned long check_pages_threshold, + void *dev_private_owner) { unsigned long start, end; int i = 0; @@ -946,7 +807,7 @@ retry: * process-many-malloc' fails. In the failure case, each process * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM * ranges. When migrating the SVM ranges, some processes fail in - * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages' + * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' * and then upon drm_gpusvm_range_get_pages device pages from * other processes are collected + faulted in which creates all * sorts of problems. Unsure exactly how this happening, also @@ -954,7 +815,7 @@ retry: * process-many-malloc' mallocs at least 64k at a time. */ if (end - start <= check_pages_threshold && - !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) { + !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) { ++i; goto retry; } @@ -1053,7 +914,7 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, if (!mmget_not_zero(mm)) return ERR_PTR(-EFAULT); - notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr); + notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1); if (!notifier) { notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr); if (IS_ERR(notifier)) { @@ -1097,7 +958,8 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm, chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas, fault_addr, gpuva_start, gpuva_end, - ctx->check_pages_threshold); + ctx->check_pages_threshold, + ctx->device_private_page_owner); if (chunk_size == LONG_MAX) { err = -EINVAL; goto err_notifier_remove; @@ -1134,31 +996,31 @@ err_mmunlock: EXPORT_SYMBOL_GPL(drm_gpusvm_range_find_or_insert); /** - * __drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range (internal) + * __drm_gpusvm_unmap_pages() - Unmap pages associated with GPU SVM pages (internal) * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * @npages: Number of pages to unmap * - * This function unmap pages associated with a GPU SVM range. Assumes and + * This function unmap pages associated with a GPU SVM pages struct. Assumes and * asserts correct locking is in place when called. */ -static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - unsigned long npages) +static void __drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages) { - unsigned long i, j; - struct drm_pagemap *dpagemap = range->dpagemap; + struct drm_pagemap *dpagemap = svm_pages->dpagemap; struct device *dev = gpusvm->drm->dev; + unsigned long i, j; lockdep_assert_held(&gpusvm->notifier_lock); - if (range->flags.has_dma_mapping) { - struct drm_gpusvm_range_flags flags = { - .__flags = range->flags.__flags, + if (svm_pages->flags.has_dma_mapping) { + struct drm_gpusvm_pages_flags flags = { + .__flags = svm_pages->flags.__flags, }; for (i = 0, j = 0; i < npages; j++) { - struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; + struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; if (addr->proto == DRM_INTERCONNECT_SYSTEM) dma_unmap_page(dev, @@ -1174,31 +1036,52 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ flags.has_devmem_pages = false; flags.has_dma_mapping = false; - WRITE_ONCE(range->flags.__flags, flags.__flags); + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); - range->dpagemap = NULL; + svm_pages->dpagemap = NULL; } } /** - * drm_gpusvm_range_free_pages() - Free pages associated with a GPU SVM range + * __drm_gpusvm_free_pages() - Free dma array associated with GPU SVM pages * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: Pointer to the GPU SVM pages structure * * This function frees the dma address array associated with a GPU SVM range. */ -static void drm_gpusvm_range_free_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range) +static void __drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) { lockdep_assert_held(&gpusvm->notifier_lock); - if (range->dma_addr) { - kvfree(range->dma_addr); - range->dma_addr = NULL; + if (svm_pages->dma_addr) { + kvfree(svm_pages->dma_addr); + svm_pages->dma_addr = NULL; } } /** + * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages + * struct + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * @npages: Number of mapped pages + * + * This function unmaps and frees the dma address array associated with a GPU + * SVM pages struct. + */ +void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages) +{ + drm_gpusvm_notifier_lock(gpusvm); + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); + __drm_gpusvm_free_pages(gpusvm, svm_pages); + drm_gpusvm_notifier_unlock(gpusvm); +} +EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages); + +/** * drm_gpusvm_range_remove() - Remove GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range to be removed @@ -1217,13 +1100,14 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm, drm_gpusvm_driver_lock_held(gpusvm); notifier = drm_gpusvm_notifier_find(gpusvm, - drm_gpusvm_range_start(range)); + drm_gpusvm_range_start(range), + drm_gpusvm_range_start(range) + 1); if (WARN_ON_ONCE(!notifier)) return; drm_gpusvm_notifier_lock(gpusvm); - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); - drm_gpusvm_range_free_pages(gpusvm, range); + __drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages); + __drm_gpusvm_free_pages(gpusvm, &range->pages); __drm_gpusvm_range_remove(notifier, range); drm_gpusvm_notifier_unlock(gpusvm); @@ -1289,6 +1173,28 @@ void drm_gpusvm_range_put(struct drm_gpusvm_range *range) EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); /** + * drm_gpusvm_pages_valid() - GPU SVM range pages valid + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * + * This function determines if a GPU SVM range pages are valid. Expected be + * called holding gpusvm->notifier_lock and as the last step before committing a + * GPU binding. This is akin to a notifier seqno check in the HMM documentation + * but due to wider notifiers (i.e., notifiers which span multiple ranges) this + * function is required for finer grained checking (i.e., per range) if pages + * are valid. + * + * Return: True if GPU SVM range has valid pages, False otherwise + */ +static bool drm_gpusvm_pages_valid(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) +{ + lockdep_assert_held(&gpusvm->notifier_lock); + + return svm_pages->flags.has_devmem_pages || svm_pages->flags.has_dma_mapping; +} + +/** * drm_gpusvm_range_pages_valid() - GPU SVM range pages valid * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure @@ -1305,9 +1211,7 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_put); bool drm_gpusvm_range_pages_valid(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) { - lockdep_assert_held(&gpusvm->notifier_lock); - - return range->flags.has_devmem_pages || range->flags.has_dma_mapping; + return drm_gpusvm_pages_valid(gpusvm, &range->pages); } EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); @@ -1321,66 +1225,71 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_pages_valid); * * Return: True if GPU SVM range has valid pages, False otherwise */ -static bool -drm_gpusvm_range_pages_valid_unlocked(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range) +static bool drm_gpusvm_pages_valid_unlocked(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages) { bool pages_valid; - if (!range->dma_addr) + if (!svm_pages->dma_addr) return false; drm_gpusvm_notifier_lock(gpusvm); - pages_valid = drm_gpusvm_range_pages_valid(gpusvm, range); + pages_valid = drm_gpusvm_pages_valid(gpusvm, svm_pages); if (!pages_valid) - drm_gpusvm_range_free_pages(gpusvm, range); + __drm_gpusvm_free_pages(gpusvm, svm_pages); drm_gpusvm_notifier_unlock(gpusvm); return pages_valid; } /** - * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range + * drm_gpusvm_get_pages() - Get pages and populate GPU SVM pages struct * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure + * @svm_pages: The SVM pages to populate. This will contain the dma-addresses + * @mm: The mm corresponding to the CPU range + * @notifier: The corresponding notifier for the given CPU range + * @pages_start: Start CPU address for the pages + * @pages_end: End CPU address for the pages (exclusive) * @ctx: GPU SVM context * - * This function gets pages for a GPU SVM range and ensures they are mapped for - * DMA access. + * This function gets and maps pages for CPU range and ensures they are + * mapped for DMA access. * * Return: 0 on success, negative error code on failure. */ -int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - const struct drm_gpusvm_ctx *ctx) +int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + struct mm_struct *mm, + struct mmu_interval_notifier *notifier, + unsigned long pages_start, unsigned long pages_end, + const struct drm_gpusvm_ctx *ctx) { - struct mmu_interval_notifier *notifier = &range->notifier->notifier; struct hmm_range hmm_range = { .default_flags = HMM_PFN_REQ_FAULT | (ctx->read_only ? 0 : HMM_PFN_REQ_WRITE), .notifier = notifier, - .start = drm_gpusvm_range_start(range), - .end = drm_gpusvm_range_end(range), - .dev_private_owner = gpusvm->device_private_page_owner, + .start = pages_start, + .end = pages_end, + .dev_private_owner = ctx->device_private_page_owner, }; - struct mm_struct *mm = gpusvm->mm; - struct drm_gpusvm_zdd *zdd; + void *zdd; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); unsigned long i, j; - unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), - drm_gpusvm_range_end(range)); + unsigned long npages = npages_in_range(pages_start, pages_end); unsigned long num_dma_mapped; unsigned int order = 0; unsigned long *pfns; int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; - struct drm_gpusvm_range_flags flags; + struct drm_gpusvm_pages_flags flags; + enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : + DMA_BIDIRECTIONAL; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); - if (drm_gpusvm_range_pages_valid_unlocked(gpusvm, range)) + if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages)) goto set_seqno; pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); @@ -1420,7 +1329,7 @@ map_pages: */ drm_gpusvm_notifier_lock(gpusvm); - flags.__flags = range->flags.__flags; + flags.__flags = svm_pages->flags.__flags; if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; @@ -1433,13 +1342,12 @@ map_pages: goto retry; } - if (!range->dma_addr) { + if (!svm_pages->dma_addr) { /* Unlock and restart mapping to allocate memory. */ drm_gpusvm_notifier_unlock(gpusvm); - range->dma_addr = kvmalloc_array(npages, - sizeof(*range->dma_addr), - GFP_KERNEL); - if (!range->dma_addr) { + svm_pages->dma_addr = + kvmalloc_array(npages, sizeof(*svm_pages->dma_addr), GFP_KERNEL); + if (!svm_pages->dma_addr) { err = -ENOMEM; goto err_free; } @@ -1447,11 +1355,12 @@ map_pages: } zdd = NULL; + pagemap = NULL; num_dma_mapped = 0; for (i = 0, j = 0; i < npages; ++j) { struct page *page = hmm_pfn_to_page(pfns[i]); - order = hmm_pfn_to_map_order(pfns[i]); + order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages); if (is_device_private_page(page) || is_device_coherent_page(page)) { if (zdd != page->zone_device_data && i > 0) { @@ -1466,7 +1375,7 @@ map_pages: } pagemap = page_pgmap(page); - dpagemap = zdd->devmem_allocation->dpagemap; + dpagemap = drm_pagemap_page_to_dpagemap(page); if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { /* * Raced. This is not supposed to happen @@ -1477,20 +1386,20 @@ map_pages: goto err_unmap; } } - range->dma_addr[j] = + svm_pages->dma_addr[j] = dpagemap->ops->device_map(dpagemap, gpusvm->drm->dev, page, order, - DMA_BIDIRECTIONAL); + dma_dir); if (dma_mapping_error(gpusvm->drm->dev, - range->dma_addr[j].addr)) { + svm_pages->dma_addr[j].addr)) { err = -EFAULT; goto err_unmap; } } else { dma_addr_t addr; - if (is_zone_device_page(page) || zdd) { + if (is_zone_device_page(page) || pagemap) { err = -EOPNOTSUPP; goto err_unmap; } @@ -1503,38 +1412,38 @@ map_pages: addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, - DMA_BIDIRECTIONAL); + dma_dir); if (dma_mapping_error(gpusvm->drm->dev, addr)) { err = -EFAULT; goto err_unmap; } - range->dma_addr[j] = drm_pagemap_device_addr_encode + svm_pages->dma_addr[j] = drm_pagemap_addr_encode (addr, DRM_INTERCONNECT_SYSTEM, order, - DMA_BIDIRECTIONAL); + dma_dir); } i += 1 << order; num_dma_mapped = i; flags.has_dma_mapping = true; } - if (zdd) { + if (pagemap) { flags.has_devmem_pages = true; - range->dpagemap = dpagemap; + svm_pages->dpagemap = dpagemap; } /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ - WRITE_ONCE(range->flags.__flags, flags.__flags); + WRITE_ONCE(svm_pages->flags.__flags, flags.__flags); drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: - range->notifier_seq = hmm_range.notifier_seq; + svm_pages->notifier_seq = hmm_range.notifier_seq; return 0; err_unmap: - __drm_gpusvm_range_unmap_pages(gpusvm, range, num_dma_mapped); + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, num_dma_mapped); drm_gpusvm_notifier_unlock(gpusvm); err_free: kvfree(pfns); @@ -1542,596 +1451,89 @@ err_free: goto retry; return err; } -EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); +EXPORT_SYMBOL_GPL(drm_gpusvm_get_pages); /** - * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range + * drm_gpusvm_range_get_pages() - Get pages for a GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure * @ctx: GPU SVM context * - * This function unmaps pages associated with a GPU SVM range. If @in_notifier - * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it - * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on - * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU - * security model. - */ -void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - const struct drm_gpusvm_ctx *ctx) -{ - unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), - drm_gpusvm_range_end(range)); - - if (ctx->in_notifier) - lockdep_assert_held_write(&gpusvm->notifier_lock); - else - drm_gpusvm_notifier_lock(gpusvm); - - __drm_gpusvm_range_unmap_pages(gpusvm, range, npages); - - if (!ctx->in_notifier) - drm_gpusvm_notifier_unlock(gpusvm); -} -EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); - -/** - * drm_gpusvm_migration_unlock_put_page() - Put a migration page - * @page: Pointer to the page to put - * - * This function unlocks and puts a page. - */ -static void drm_gpusvm_migration_unlock_put_page(struct page *page) -{ - unlock_page(page); - put_page(page); -} - -/** - * drm_gpusvm_migration_unlock_put_pages() - Put migration pages - * @npages: Number of pages - * @migrate_pfn: Array of migrate page frame numbers - * - * This function unlocks and puts an array of pages. - */ -static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages, - unsigned long *migrate_pfn) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) { - struct page *page; - - if (!migrate_pfn[i]) - continue; - - page = migrate_pfn_to_page(migrate_pfn[i]); - drm_gpusvm_migration_unlock_put_page(page); - migrate_pfn[i] = 0; - } -} - -/** - * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page - * @page: Pointer to the page - * @zdd: Pointer to the GPU SVM zone device data + * This function gets pages for a GPU SVM range and ensures they are mapped for + * DMA access. * - * This function associates the given page with the specified GPU SVM zone - * device data and initializes it for zone device usage. + * Return: 0 on success, negative error code on failure. */ -static void drm_gpusvm_get_devmem_page(struct page *page, - struct drm_gpusvm_zdd *zdd) +int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_range *range, + const struct drm_gpusvm_ctx *ctx) { - page->zone_device_data = drm_gpusvm_zdd_get(zdd); - zone_device_page_init(page); -} - -/** - * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration - * @dev: The device for which the pages are being mapped - * @dma_addr: Array to store DMA addresses corresponding to mapped pages - * @migrate_pfn: Array of migrate page frame numbers to map - * @npages: Number of pages to map - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) - * - * This function maps pages of memory for migration usage in GPU SVM. It - * iterates over each page frame number provided in @migrate_pfn, maps the - * corresponding page, and stores the DMA address in the provided @dma_addr - * array. - * - * Return: 0 on success, -EFAULT if an error occurs during mapping. - */ -static int drm_gpusvm_migrate_map_pages(struct device *dev, - dma_addr_t *dma_addr, - unsigned long *migrate_pfn, - unsigned long npages, - enum dma_data_direction dir) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(migrate_pfn[i]); - - if (!page) - continue; - - if (WARN_ON_ONCE(is_zone_device_page(page))) - return -EFAULT; - - dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); - if (dma_mapping_error(dev, dma_addr[i])) - return -EFAULT; - } - - return 0; + return drm_gpusvm_get_pages(gpusvm, &range->pages, gpusvm->mm, + &range->notifier->notifier, + drm_gpusvm_range_start(range), + drm_gpusvm_range_end(range), ctx); } +EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); /** - * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration - * @dev: The device for which the pages were mapped - * @dma_addr: Array of DMA addresses corresponding to mapped pages - * @npages: Number of pages to unmap - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * drm_gpusvm_unmap_pages() - Unmap GPU svm pages + * @gpusvm: Pointer to the GPU SVM structure + * @svm_pages: Pointer to the GPU SVM pages structure + * @npages: Number of pages in @svm_pages. + * @ctx: GPU SVM context * - * This function unmaps previously mapped pages of memory for GPU Shared Virtual - * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks - * if it's valid and not already unmapped, and unmaps the corresponding page. + * This function unmaps pages associated with a GPU SVM pages struct. If + * @in_notifier is set, it is assumed that gpusvm->notifier_lock is held in + * write mode; if it is clear, it acquires gpusvm->notifier_lock in read mode. + * Must be called in the invalidate() callback of the corresponding notifier for + * IOMMU security model. */ -static void drm_gpusvm_migrate_unmap_pages(struct device *dev, - dma_addr_t *dma_addr, - unsigned long npages, - enum dma_data_direction dir) +void drm_gpusvm_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_pages *svm_pages, + unsigned long npages, + const struct drm_gpusvm_ctx *ctx) { - unsigned long i; + if (ctx->in_notifier) + lockdep_assert_held_write(&gpusvm->notifier_lock); + else + drm_gpusvm_notifier_lock(gpusvm); - for (i = 0; i < npages; ++i) { - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) - continue; + __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages); - dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); - } + if (!ctx->in_notifier) + drm_gpusvm_notifier_unlock(gpusvm); } +EXPORT_SYMBOL_GPL(drm_gpusvm_unmap_pages); /** - * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory + * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure - * @devmem_allocation: Pointer to the device memory allocation. The caller - * should hold a reference to the device memory allocation, - * which should be dropped via ops->devmem_release or upon - * the failure of this function. * @ctx: GPU SVM context * - * This function migrates the specified GPU SVM range to device memory. It - * performs the necessary setup and invokes the driver-specific operations for - * migration to device memory. Upon successful return, @devmem_allocation can - * safely reference @range until ops->devmem_release is called which only upon - * successful return. Expected to be called while holding the mmap lock in read - * mode. - * - * Return: 0 on success, negative error code on failure. - */ -int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - struct drm_gpusvm_devmem *devmem_allocation, - const struct drm_gpusvm_ctx *ctx) -{ - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; - unsigned long start = drm_gpusvm_range_start(range), - end = drm_gpusvm_range_end(range); - struct migrate_vma migrate = { - .start = start, - .end = end, - .pgmap_owner = gpusvm->device_private_page_owner, - .flags = MIGRATE_VMA_SELECT_SYSTEM, - }; - struct mm_struct *mm = gpusvm->mm; - unsigned long i, npages = npages_in_range(start, end); - struct vm_area_struct *vas; - struct drm_gpusvm_zdd *zdd = NULL; - struct page **pages; - dma_addr_t *dma_addr; - void *buf; - int err; - - mmap_assert_locked(gpusvm->mm); - - if (!range->flags.migrate_devmem) - return -EINVAL; - - if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || - !ops->copy_to_ram) - return -EOPNOTSUPP; - - vas = vma_lookup(mm, start); - if (!vas) { - err = -ENOENT; - goto err_out; - } - - if (end > vas->vm_end || start < vas->vm_start) { - err = -EINVAL; - goto err_out; - } - - if (!vma_is_anonymous(vas)) { - err = -EBUSY; - goto err_out; - } - - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; - - zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner); - if (!zdd) { - err = -ENOMEM; - goto err_free; - } - - migrate.vma = vas; - migrate.src = buf; - migrate.dst = migrate.src + npages; - - err = migrate_vma_setup(&migrate); - if (err) - goto err_free; - - if (!migrate.cpages) { - err = -EFAULT; - goto err_free; - } - - if (migrate.cpages != npages) { - err = -EBUSY; - goto err_finalize; - } - - err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); - if (err) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, - migrate.src, npages, DMA_TO_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) { - struct page *page = pfn_to_page(migrate.dst[i]); - - pages[i] = page; - migrate.dst[i] = migrate_pfn(migrate.dst[i]); - drm_gpusvm_get_devmem_page(page, zdd); - } - - err = ops->copy_to_devmem(pages, dma_addr, npages); - if (err) - goto err_finalize; - - /* Upon success bind devmem allocation to range and zdd */ - devmem_allocation->timeslice_expiration = get_jiffies_64() + - msecs_to_jiffies(ctx->timeslice_ms); - zdd->devmem_allocation = devmem_allocation; /* Owns ref */ - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); - migrate_vma_pages(&migrate); - migrate_vma_finalize(&migrate); - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, - DMA_TO_DEVICE); -err_free: - if (zdd) - drm_gpusvm_zdd_put(zdd); - kvfree(buf); -err_out: - return err; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem); - -/** - * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area - * @vas: Pointer to the VM area structure, can be NULL - * @fault_page: Fault page - * @npages: Number of pages to populate - * @mpages: Number of pages to migrate - * @src_mpfn: Source array of migrate PFNs - * @mpfn: Array of migrate PFNs to populate - * @addr: Start address for PFN allocation - * - * This function populates the RAM migrate page frame numbers (PFNs) for the - * specified VM area structure. It allocates and locks pages in the VM area for - * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use - * alloc_page for allocation. - * - * Return: 0 on success, negative error code on failure. - */ -static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas, - struct page *fault_page, - unsigned long npages, - unsigned long *mpages, - unsigned long *src_mpfn, - unsigned long *mpfn, - unsigned long addr) -{ - unsigned long i; - - for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { - struct page *page, *src_page; - - if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) - continue; - - src_page = migrate_pfn_to_page(src_mpfn[i]); - if (!src_page) - continue; - - if (fault_page) { - if (src_page->zone_device_data != - fault_page->zone_device_data) - continue; - } - - if (vas) - page = alloc_page_vma(GFP_HIGHUSER, vas, addr); - else - page = alloc_page(GFP_HIGHUSER); - - if (!page) - goto free_pages; - - mpfn[i] = migrate_pfn(page_to_pfn(page)); - } - - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(mpfn[i]); - - if (!page) - continue; - - WARN_ON_ONCE(!trylock_page(page)); - ++*mpages; - } - - return 0; - -free_pages: - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(mpfn[i]); - - if (!page) - continue; - - put_page(page); - mpfn[i] = 0; - } - return -ENOMEM; -} - -/** - * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM - * @devmem_allocation: Pointer to the device memory allocation - * - * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and - * migration done via migrate_device_* functions. - * - * Return: 0 on success, negative error code on failure. - */ -int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation) -{ - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; - unsigned long npages, mpages = 0; - struct page **pages; - unsigned long *src, *dst; - dma_addr_t *dma_addr; - void *buf; - int i, err = 0; - unsigned int retry_count = 2; - - npages = devmem_allocation->size >> PAGE_SHIFT; - -retry: - if (!mmget_not_zero(devmem_allocation->mm)) - return -EFAULT; - - buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - src = buf; - dst = buf + (sizeof(*src) * npages); - dma_addr = buf + (2 * sizeof(*src) * npages); - pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; - - err = ops->populate_devmem_pfn(devmem_allocation, npages, src); - if (err) - goto err_free; - - err = migrate_device_pfns(src, npages); - if (err) - goto err_free; - - err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, - src, dst, 0); - if (err || !mpages) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, - dst, npages, DMA_FROM_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) - pages[i] = migrate_pfn_to_page(src[i]); - - err = ops->copy_to_ram(pages, dma_addr, npages); - if (err) - goto err_finalize; - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, dst); - migrate_device_pages(src, dst, npages); - migrate_device_finalize(src, dst, npages); - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, - DMA_FROM_DEVICE); -err_free: - kvfree(buf); -err_out: - mmput_async(devmem_allocation->mm); - - if (completion_done(&devmem_allocation->detached)) - return 0; - - if (retry_count--) { - cond_resched(); - goto retry; - } - - return err ?: -EBUSY; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram); - -/** - * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) - * @vas: Pointer to the VM area structure - * @device_private_page_owner: Device private pages owner - * @page: Pointer to the page for fault handling (can be NULL) - * @fault_addr: Fault address - * @size: Size of migration - * - * This internal function performs the migration of the specified GPU SVM range - * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and - * invokes the driver-specific operations for migration to RAM. - * - * Return: 0 on success, negative error code on failure. + * This function unmaps pages associated with a GPU SVM range. If @in_notifier + * is set, it is assumed that gpusvm->notifier_lock is held in write mode; if it + * is clear, it acquires gpusvm->notifier_lock in read mode. Must be called on + * each GPU SVM range attached to notifier in gpusvm->ops->invalidate for IOMMU + * security model. */ -static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, - void *device_private_page_owner, - struct page *page, - unsigned long fault_addr, - unsigned long size) +void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, + struct drm_gpusvm_range *range, + const struct drm_gpusvm_ctx *ctx) { - struct migrate_vma migrate = { - .vma = vas, - .pgmap_owner = device_private_page_owner, - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | - MIGRATE_VMA_SELECT_DEVICE_COHERENT, - .fault_page = page, - }; - struct drm_gpusvm_zdd *zdd; - const struct drm_gpusvm_devmem_ops *ops; - struct device *dev = NULL; - unsigned long npages, mpages = 0; - struct page **pages; - dma_addr_t *dma_addr; - unsigned long start, end; - void *buf; - int i, err = 0; - - if (page) { - zdd = page->zone_device_data; - if (time_before64(get_jiffies_64(), - zdd->devmem_allocation->timeslice_expiration)) - return 0; - } - - start = ALIGN_DOWN(fault_addr, size); - end = ALIGN(fault_addr + 1, size); - - /* Corner where VMA area struct has been partially unmapped */ - if (start < vas->vm_start) - start = vas->vm_start; - if (end > vas->vm_end) - end = vas->vm_end; - - migrate.start = start; - migrate.end = end; - npages = npages_in_range(start, end); - - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; - - migrate.vma = vas; - migrate.src = buf; - migrate.dst = migrate.src + npages; - - err = migrate_vma_setup(&migrate); - if (err) - goto err_free; - - /* Raced with another CPU fault, nothing to do */ - if (!migrate.cpages) - goto err_free; - - if (!page) { - for (i = 0; i < npages; ++i) { - if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) - continue; - - page = migrate_pfn_to_page(migrate.src[i]); - break; - } - - if (!page) - goto err_finalize; - } - zdd = page->zone_device_data; - ops = zdd->devmem_allocation->ops; - dev = zdd->devmem_allocation->dev; - - err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages, - migrate.src, migrate.dst, - start); - if (err) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages, - DMA_FROM_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) - pages[i] = migrate_pfn_to_page(migrate.src[i]); - - err = ops->copy_to_ram(pages, dma_addr, npages); - if (err) - goto err_finalize; - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); - migrate_vma_pages(&migrate); - migrate_vma_finalize(&migrate); - if (dev) - drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages, - DMA_FROM_DEVICE); -err_free: - kvfree(buf); -err_out: + unsigned long npages = npages_in_range(drm_gpusvm_range_start(range), + drm_gpusvm_range_end(range)); - return err; + return drm_gpusvm_unmap_pages(gpusvm, &range->pages, npages, ctx); } +EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); /** - * drm_gpusvm_range_evict - Evict GPU SVM range + * drm_gpusvm_range_evict() - Evict GPU SVM range + * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range to be removed * - * This function evicts the specified GPU SVM range. This function will not - * evict coherent pages. + * This function evicts the specified GPU SVM range. * * Return: 0 on success, a negative error code on failure. */ @@ -2184,60 +1586,6 @@ int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); /** - * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page - * @page: Pointer to the page - * - * This function is a callback used to put the GPU SVM zone device data - * associated with a page when it is being released. - */ -static void drm_gpusvm_page_free(struct page *page) -{ - drm_gpusvm_zdd_put(page->zone_device_data); -} - -/** - * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler) - * @vmf: Pointer to the fault information structure - * - * This function is a page fault handler used to migrate a GPU SVM range to RAM. - * It retrieves the GPU SVM range information from the faulting page and invokes - * the internal migration function to migrate the range back to RAM. - * - * Return: VM_FAULT_SIGBUS on failure, 0 on success. - */ -static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) -{ - struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data; - int err; - - err = __drm_gpusvm_migrate_to_ram(vmf->vma, - zdd->device_private_page_owner, - vmf->page, vmf->address, - zdd->devmem_allocation->size); - - return err ? VM_FAULT_SIGBUS : 0; -} - -/* - * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM - */ -static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { - .page_free = drm_gpusvm_page_free, - .migrate_to_ram = drm_gpusvm_migrate_to_ram, -}; - -/** - * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations - * - * Return: Pointer to the GPU SVM device page map operations structure. - */ -const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void) -{ - return &drm_gpusvm_pagemap_ops; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get); - -/** * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range * @gpusvm: Pointer to the GPU SVM structure. * @start: Start address @@ -2274,35 +1622,12 @@ void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, { lockdep_assert_held_write(&range->gpusvm->notifier_lock); - range->flags.unmapped = true; + range->pages.flags.unmapped = true; if (drm_gpusvm_range_start(range) < mmu_range->start || drm_gpusvm_range_end(range) > mmu_range->end) - range->flags.partial_unmap = true; + range->pages.flags.partial_unmap = true; } EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); -/** - * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation - * - * @dev: Pointer to the device structure which device memory allocation belongs to - * @mm: Pointer to the mm_struct for the address space - * @ops: Pointer to the operations structure for GPU SVM device memory - * @dpagemap: The struct drm_pagemap we're allocating from. - * @size: Size of device memory allocation - */ -void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, - struct device *dev, struct mm_struct *mm, - const struct drm_gpusvm_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size) -{ - init_completion(&devmem_allocation->detached); - devmem_allocation->dev = dev; - devmem_allocation->mm = mm; - devmem_allocation->ops = ops; - devmem_allocation->dpagemap = dpagemap; - devmem_allocation->size = size; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init); - MODULE_DESCRIPTION("DRM GPUSVM"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index f62005ff9b2e..af63f4d00315 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -40,7 +40,7 @@ * mapping's backing &drm_gem_object buffers. * * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing - * all existent GPU VA mappings using this &drm_gem_object as backing buffer. + * all existing GPU VA mappings using this &drm_gem_object as backing buffer. * * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. @@ -72,7 +72,7 @@ * but it can also be a 'dummy' object, which can be allocated with * drm_gpuvm_resv_object_alloc(). * - * In order to connect a struct drm_gpuva its backing &drm_gem_object each + * In order to connect a struct drm_gpuva to its backing &drm_gem_object each * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each * &drm_gpuvm_bo contains a list of &drm_gpuva structures. * @@ -81,7 +81,7 @@ * This is ensured by the API through drm_gpuvm_bo_obtain() and * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this - * particular combination. If not existent a new instance is created and linked + * particular combination. If not present, a new instance is created and linked * to the &drm_gem_object. * * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used @@ -108,7 +108,7 @@ * sequence of operations to satisfy a given map or unmap request. * * Therefore the DRM GPU VA manager provides an algorithm implementing splitting - * and merging of existent GPU VA mappings with the ones that are requested to + * and merging of existing GPU VA mappings with the ones that are requested to * be mapped or unmapped. This feature is required by the Vulkan API to * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this * as VM BIND. @@ -119,7 +119,7 @@ * execute in order to integrate the new mapping cleanly into the current state * of the GPU VA space. * - * Depending on how the new GPU VA mapping intersects with the existent mappings + * Depending on how the new GPU VA mapping intersects with the existing mappings * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount * of unmap operations, a maximum of two remap operations and a single map * operation. The caller might receive no callback at all if no operation is @@ -139,16 +139,16 @@ * one unmap operation and one or two map operations, such that drivers can * derive the page table update delta accordingly. * - * Note that there can't be more than two existent mappings to split up, one at + * Note that there can't be more than two existing mappings to split up, one at * the beginning and one at the end of the new mapping, hence there is a * maximum of two remap operations. * * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to * call back into the driver in order to unmap a range of GPU VA space. The - * logic behind this function is way simpler though: For all existent mappings + * logic behind this function is way simpler though: For all existing mappings * enclosed by the given range unmap operations are created. For mappings which - * are only partically located within the given range, remap operations are - * created such that those mappings are split up and re-mapped partically. + * are only partially located within the given range, remap operations are + * created such that those mappings are split up and re-mapped partially. * * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(), * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used @@ -168,7 +168,7 @@ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and * drm_gpuva_unmap() instead. * - * The following diagram depicts the basic relationships of existent GPU VA + * The following diagram depicts the basic relationships of existing GPU VA * mappings, a newly requested mapping and the resulting mappings as implemented * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these. * @@ -218,7 +218,7 @@ * * * 4) Existent mapping is a left aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -236,9 +236,9 @@ * and/or non-contiguous BO offset. * * - * 5) Requested mapping's range is a left aligned subset of the existent one, + * 5) Requested mapping's range is a left aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one adjusting its BO offset. + * the existing one adjusting its BO offset. * * :: * @@ -271,9 +271,9 @@ * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) * * - * 7) Requested mapping's range is a right aligned subset of the existent one, + * 7) Requested mapping's range is a right aligned subset of the existing one, * but backed by a different BO. Hence, map the requested mapping and split - * the existent one, without adjusting the BO offset. + * the existing one, without adjusting the BO offset. * * :: * @@ -304,7 +304,7 @@ * * 9) Existent mapping is overlapped at the end by the requested mapping backed * by a different BO. Hence, map the requested mapping and split up the - * existent one, without adjusting the BO offset. + * existing one, without adjusting the BO offset. * * :: * @@ -334,9 +334,9 @@ * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) * * - * 11) Requested mapping's range is a centered subset of the existent one + * 11) Requested mapping's range is a centered subset of the existing one * having a different backing BO. Hence, map the requested mapping and split - * up the existent one in two mappings, adjusting the BO offset of the right + * up the existing one in two mappings, adjusting the BO offset of the right * one accordingly. * * :: @@ -351,7 +351,7 @@ * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) * * - * 12) Requested mapping is a contiguous subset of the existent one. Split it + * 12) Requested mapping is a contiguous subset of the existing one. Split it * up, but indicate that the backing PTEs could be kept. * * :: @@ -367,7 +367,7 @@ * * * 13) Existent mapping is a right aligned subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -386,7 +386,7 @@ * * * 14) Existent mapping is a centered subset of the requested one, hence - * replace the existent one. + * replace the existing one. * * :: * @@ -406,7 +406,7 @@ * * 15) Existent mappings is overlapped at the beginning by the requested mapping * backed by a different BO. Hence, map the requested mapping and split up - * the existent one, adjusting its BO offset accordingly. + * the existing one, adjusting its BO offset accordingly. * * :: * @@ -421,6 +421,71 @@ */ /** + * DOC: Madvise Logic - Splitting and Traversal + * + * This logic handles GPU VA range updates by generating remap and map operations + * without performing unmaps or merging existing mappings. + * + * 1) The requested range lies entirely within a single drm_gpuva. The logic splits + * the existing mapping at the start and end boundaries and inserts a new map. + * + * :: + * a start end b + * pre: |-----------------------| + * drm_gpuva1 + * + * a start end b + * new: |-----|=========|-------| + * remap map remap + * + * one REMAP and one MAP : Same behaviour as SPLIT and MERGE + * + * 2) The requested range spans multiple drm_gpuva regions. The logic traverses + * across boundaries, remapping the start and end segments, and inserting two + * map operations to cover the full range. + * + * :: a start b c end d + * pre: |------------------|--------------|------------------| + * drm_gpuva1 drm_gpuva2 drm_gpuva3 + * + * a start b c end d + * new: |-------|==========|--------------|========|---------| + * remap1 map1 drm_gpuva2 map2 remap2 + * + * two REMAPS and two MAPS + * + * 3) Either start or end lies within a drm_gpuva. A single remap and map operation + * are generated to update the affected portion. + * + * + * :: a/start b c end d + * pre: |------------------|--------------|------------------| + * drm_gpuva1 drm_gpuva2 drm_gpuva3 + * + * a/start b c end d + * new: |------------------|--------------|========|---------| + * drm_gpuva1 drm_gpuva2 map1 remap1 + * + * :: a start b c/end d + * pre: |------------------|--------------|------------------| + * drm_gpuva1 drm_gpuva2 drm_gpuva3 + * + * a start b c/end d + * new: |-------|==========|--------------|------------------| + * remap1 map1 drm_gpuva2 drm_gpuva3 + * + * one REMAP and one MAP + * + * 4) Both start and end align with existing drm_gpuva boundaries. No operations + * are needed as the range is already covered. + * + * 5) No existing drm_gpuvas. No operations. + * + * Unlike drm_gpuvm_sm_map_ops_create, this logic avoids unmaps and merging, + * focusing solely on remap and map operations for efficient traversal and update. + */ + +/** * DOC: Locking * * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of @@ -432,8 +497,7 @@ * DRM GPUVM also does not take care of the locking of the backing * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by * itself; drivers are responsible to enforce mutual exclusion using either the - * GEMs dma_resv lock or alternatively a driver specific external lock. For the - * latter see also drm_gem_gpuva_set_lock(). + * GEMs dma_resv lock or the GEMs gpuva.lock mutex. * * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed @@ -469,8 +533,8 @@ * make use of them. * * The below code is strictly limited to illustrate the generic usage pattern. - * To maintain simplicitly, it doesn't make use of any abstractions for common - * code, different (asyncronous) stages with fence signalling critical paths, + * To maintain simplicity, it doesn't make use of any abstractions for common + * code, different (asynchronous) stages with fence signalling critical paths, * any other helpers or error handling in terms of freeing memory and dropping * previously taken locks. * @@ -479,20 +543,25 @@ * // Allocates a new &drm_gpuva. * struct drm_gpuva * driver_gpuva_alloc(void); * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, * u64 addr, u64 range, * struct drm_gem_object *obj, u64 offset) * { + * struct drm_gpuvm_map_req map_req = { + * .map.va.addr = addr, + * .map.va.range = range, + * .map.gem.obj = obj, + * .map.gem.offset = offset, + * }; * struct drm_gpuva_ops *ops; * struct drm_gpuva_op *op * struct drm_gpuvm_bo *vm_bo; * * driver_lock_va_space(); - * ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range, - * obj, offset); + * ops = drm_gpuvm_sm_map_ops_create(gpuvm, &map_req); * if (IS_ERR(ops)) * return PTR_ERR(ops); * @@ -582,7 +651,7 @@ * .sm_step_unmap = driver_gpuva_unmap, * }; * - * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva + * // Typically drivers would embed the &drm_gpuvm and &drm_gpuva * // structure in individual driver structures and lock the dma-resv with * // drm_exec or similar helpers. * int driver_mapping_create(struct drm_gpuvm *gpuvm, @@ -680,7 +749,7 @@ * * This helper is here to provide lockless list iteration. Lockless as in, the * iterator releases the lock immediately after picking the first element from - * the list, so list insertion deletion can happen concurrently. + * the list, so list insertion and deletion can happen concurrently. * * Elements popped from the original list are kept in a local list, so removal * and is_empty checks can still happen while we're iterating the list. @@ -1160,7 +1229,7 @@ drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, } /** - * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs + * drm_gpuvm_prepare_objects() - prepare all associated BOs * @gpuvm: the &drm_gpuvm * @exec: the &drm_exec locking context * @num_fences: the amount of &dma_fences to reserve @@ -1230,13 +1299,13 @@ drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); /** - * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * * Acquires all dma-resv locks of all &drm_gem_objects the given * &drm_gpuvm contains mappings of. * - * Addionally, when calling this function with struct drm_gpuvm_exec::extra + * Additionally, when calling this function with struct drm_gpuvm_exec::extra * being set the driver receives the given @fn callback to lock additional * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers * would call drm_exec_prepare_obj() from within this callback. @@ -1293,7 +1362,7 @@ fn_lock_array(struct drm_gpuvm_exec *vm_exec) } /** - * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs + * drm_gpuvm_exec_lock_array() - lock all dma-resv of all associated BOs * @vm_exec: the &drm_gpuvm_exec wrapper * @objs: additional &drm_gem_objects to lock * @num_objs: the number of additional &drm_gem_objects to lock @@ -1512,7 +1581,7 @@ drm_gpuvm_bo_destroy(struct kref *kref) drm_gpuvm_bo_list_del(vm_bo, extobj, lock); drm_gpuvm_bo_list_del(vm_bo, evict, lock); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_del(&vm_bo->list.entry.gem); if (ops && ops->vm_bo_free) @@ -1533,7 +1602,8 @@ drm_gpuvm_bo_destroy(struct kref *kref) * If the reference count drops to zero, the &gpuvm_bo is destroyed, which * includes removing it from the GEMs gpuva list. Hence, if a call to this * function can potentially let the reference count drop to zero the caller must - * hold the dma-resv or driver specific GEM gpuva lock. + * hold the lock that the GEM uses for its gpuva list (either the GEM's + * dma-resv or gpuva.lock mutex). * * This function may only be called from non-atomic context. * @@ -1557,7 +1627,7 @@ __drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, { struct drm_gpuvm_bo *vm_bo; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); drm_gem_for_each_gpuvm_bo(vm_bo, obj) if (vm_bo->vm == gpuvm) return vm_bo; @@ -1588,7 +1658,7 @@ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); /** - * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the + * drm_gpuvm_bo_obtain() - obtains an instance of the &drm_gpuvm_bo for the * given &drm_gpuvm and &drm_gem_object * @gpuvm: The &drm_gpuvm the @obj is mapped in. * @obj: The &drm_gem_object being mapped in the @gpuvm. @@ -1616,7 +1686,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, if (!vm_bo) return ERR_PTR(-ENOMEM); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list); return vm_bo; @@ -1624,7 +1694,7 @@ drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); /** - * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo + * drm_gpuvm_bo_obtain_prealloc() - obtains an instance of the &drm_gpuvm_bo * for the given &drm_gpuvm and &drm_gem_object * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. * @@ -1652,7 +1722,7 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) return vm_bo; } - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); return __vm_bo; @@ -1688,7 +1758,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); * @vm_bo: the &drm_gpuvm_bo to add or remove * @evict: indicates whether the object is evicted * - * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. + * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvm's evicted list. */ void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) @@ -1790,7 +1860,7 @@ __drm_gpuva_remove(struct drm_gpuva *va) * drm_gpuva_remove() - remove a &drm_gpuva * @va: the &drm_gpuva to remove * - * This removes the given &va from the underlaying tree. + * This removes the given &va from the underlying tree. * * It is safe to use this function using the safe versions of iterating the GPU * VA space, such as drm_gpuvm_for_each_va_safe() and @@ -1824,8 +1894,7 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove); * reference of the latter is taken. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using either the GEMs dma_resv lock or a driver specific - * lock set through drm_gem_gpuva_set_lock(). + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. */ void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) @@ -1840,7 +1909,7 @@ drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) va->vm_bo = drm_gpuvm_bo_get(vm_bo); - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&va->gem.entry, &vm_bo->list.gpuva); } EXPORT_SYMBOL_GPL(drm_gpuva_link); @@ -1860,8 +1929,7 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link); * the latter is dropped. * * This function expects the caller to protect the GEM's GPUVA list against - * concurrent access using either the GEMs dma_resv lock or a driver specific - * lock set through drm_gem_gpuva_set_lock(). + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. */ void drm_gpuva_unlink(struct drm_gpuva *va) @@ -1872,7 +1940,7 @@ drm_gpuva_unlink(struct drm_gpuva *va) if (unlikely(!obj)) return; - drm_gem_gpuva_assert_lock_held(obj); + drm_gem_gpuva_assert_lock_held(va->vm, obj); list_del_init(&va->gem.entry); va->vm_bo = NULL; @@ -2054,16 +2122,18 @@ EXPORT_SYMBOL_GPL(drm_gpuva_unmap); static int op_map_cb(const struct drm_gpuvm_ops *fn, void *priv, - u64 addr, u64 range, - struct drm_gem_object *obj, u64 offset) + const struct drm_gpuvm_map_req *req) { struct drm_gpuva_op op = {}; + if (!req) + return 0; + op.op = DRM_GPUVA_OP_MAP; - op.map.va.addr = addr; - op.map.va.range = range; - op.map.gem.obj = obj; - op.map.gem.offset = offset; + op.map.va.addr = req->map.va.addr; + op.map.va.range = req->map.va.range; + op.map.gem.obj = req->map.gem.obj; + op.map.gem.offset = req->map.gem.offset; return fn->sm_step_map(&op, priv); } @@ -2088,10 +2158,13 @@ op_remap_cb(const struct drm_gpuvm_ops *fn, void *priv, static int op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, - struct drm_gpuva *va, bool merge) + struct drm_gpuva *va, bool merge, bool madvise) { struct drm_gpuva_op op = {}; + if (madvise) + return 0; + op.op = DRM_GPUVA_OP_UNMAP; op.unmap.va = va; op.unmap.keep = merge; @@ -2102,10 +2175,15 @@ op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, static int __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, const struct drm_gpuvm_ops *ops, void *priv, - u64 req_addr, u64 req_range, - struct drm_gem_object *req_obj, u64 req_offset) + const struct drm_gpuvm_map_req *req, + bool madvise) { + struct drm_gem_object *req_obj = req->map.gem.obj; + const struct drm_gpuvm_map_req *op_map = madvise ? NULL : req; struct drm_gpuva *va, *next; + u64 req_offset = req->map.gem.offset; + u64 req_range = req->map.va.range; + u64 req_addr = req->map.va.addr; u64 req_end = req_addr + req_range; int ret; @@ -2120,19 +2198,22 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, u64 end = addr + range; bool merge = !!va->gem.obj; + if (madvise && obj) + continue; + if (addr == req_addr) { merge &= obj == req_obj && offset == req_offset; if (end == req_end) { - ret = op_unmap_cb(ops, priv, va, merge); + ret = op_unmap_cb(ops, priv, va, merge, madvise); if (ret) return ret; break; } if (end < req_end) { - ret = op_unmap_cb(ops, priv, va, merge); + ret = op_unmap_cb(ops, priv, va, merge, madvise); if (ret) return ret; continue; @@ -2153,6 +2234,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ret = op_remap_cb(ops, priv, NULL, &n, &u); if (ret) return ret; + + if (madvise) + op_map = req; break; } } else if (addr < req_addr) { @@ -2173,6 +2257,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ret = op_remap_cb(ops, priv, &p, NULL, &u); if (ret) return ret; + + if (madvise) + op_map = req; break; } @@ -2180,6 +2267,18 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ret = op_remap_cb(ops, priv, &p, NULL, &u); if (ret) return ret; + + if (madvise) { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = req_addr, + .map.va.range = end - req_addr, + }; + + ret = op_map_cb(ops, priv, &map_req); + if (ret) + return ret; + } + continue; } @@ -2195,6 +2294,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ret = op_remap_cb(ops, priv, &p, &n, &u); if (ret) return ret; + + if (madvise) + op_map = req; break; } } else if (addr > req_addr) { @@ -2203,16 +2305,18 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, (addr - req_addr); if (end == req_end) { - ret = op_unmap_cb(ops, priv, va, merge); + ret = op_unmap_cb(ops, priv, va, merge, madvise); if (ret) return ret; + break; } if (end < req_end) { - ret = op_unmap_cb(ops, priv, va, merge); + ret = op_unmap_cb(ops, priv, va, merge, madvise); if (ret) return ret; + continue; } @@ -2231,14 +2335,20 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ret = op_remap_cb(ops, priv, NULL, &n, &u); if (ret) return ret; + + if (madvise) { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = addr, + .map.va.range = req_end - addr, + }; + + return op_map_cb(ops, priv, &map_req); + } break; } } } - - return op_map_cb(ops, priv, - req_addr, req_range, - req_obj, req_offset); + return op_map_cb(ops, priv, op_map); } static int @@ -2290,7 +2400,7 @@ __drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, if (ret) return ret; } else { - ret = op_unmap_cb(ops, priv, va, false); + ret = op_unmap_cb(ops, priv, va, false, false); if (ret) return ret; } @@ -2303,10 +2413,7 @@ __drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, * drm_gpuvm_sm_map() - calls the &drm_gpuva_op split/merge steps * @gpuvm: the &drm_gpuvm representing the GPU VA space * @priv: pointer to a driver private data structure - * @req_addr: the start address of the new mapping - * @req_range: the range of the new mapping - * @req_obj: the &drm_gem_object to map - * @req_offset: the offset within the &drm_gem_object + * @req: ptr to struct drm_gpuvm_map_req * * This function iterates the given range of the GPU VA space. It utilizes the * &drm_gpuvm_ops to call back into the driver providing the split and merge @@ -2333,8 +2440,7 @@ __drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, */ int drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, - u64 req_addr, u64 req_range, - struct drm_gem_object *req_obj, u64 req_offset) + const struct drm_gpuvm_map_req *req) { const struct drm_gpuvm_ops *ops = gpuvm->ops; @@ -2343,9 +2449,7 @@ drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, ops->sm_step_unmap))) return -EINVAL; - return __drm_gpuvm_sm_map(gpuvm, ops, priv, - req_addr, req_range, - req_obj, req_offset); + return __drm_gpuvm_sm_map(gpuvm, ops, priv, req, false); } EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); @@ -2358,7 +2462,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); * * This function iterates the given range of the GPU VA space. It utilizes the * &drm_gpuvm_ops to call back into the driver providing the operations to - * unmap and, if required, split existent mappings. + * unmap and, if required, split existing mappings. * * Drivers may use these callbacks to update the GPU VA space right away within * the callback. In case the driver decides to copy and store the operations for @@ -2421,10 +2525,7 @@ static const struct drm_gpuvm_ops lock_ops = { * @gpuvm: the &drm_gpuvm representing the GPU VA space * @exec: the &drm_exec locking context * @num_fences: for newly mapped objects, the # of fences to reserve - * @req_addr: the start address of the range to unmap - * @req_range: the range of the mappings to unmap - * @req_obj: the &drm_gem_object to map - * @req_offset: the offset within the &drm_gem_object + * @req: ptr to drm_gpuvm_map_req struct * * This function locks (drm_exec_lock_obj()) objects that will be unmapped/ * remapped, and locks+prepares (drm_exec_prepare_object()) objects that @@ -2445,9 +2546,7 @@ static const struct drm_gpuvm_ops lock_ops = { * ret = drm_gpuvm_sm_unmap_exec_lock(gpuvm, &exec, op->addr, op->range); * break; * case DRIVER_OP_MAP: - * ret = drm_gpuvm_sm_map_exec_lock(gpuvm, &exec, num_fences, - * op->addr, op->range, - * obj, op->obj_offset); + * ret = drm_gpuvm_sm_map_exec_lock(gpuvm, &exec, num_fences, &req); * break; * } * @@ -2473,23 +2572,22 @@ static const struct drm_gpuvm_ops lock_ops = { * required without the earlier DRIVER_OP_MAP. This is safe because we've * already locked the GEM object in the earlier DRIVER_OP_MAP step. * - * Returns: 0 on success or a negative error codec + * Returns: 0 on success or a negative error code */ int drm_gpuvm_sm_map_exec_lock(struct drm_gpuvm *gpuvm, struct drm_exec *exec, unsigned int num_fences, - u64 req_addr, u64 req_range, - struct drm_gem_object *req_obj, u64 req_offset) + struct drm_gpuvm_map_req *req) { + struct drm_gem_object *req_obj = req->map.gem.obj; + if (req_obj) { int ret = drm_exec_prepare_obj(exec, req_obj, num_fences); if (ret) return ret; } - return __drm_gpuvm_sm_map(gpuvm, &lock_ops, exec, - req_addr, req_range, - req_obj, req_offset); + return __drm_gpuvm_sm_map(gpuvm, &lock_ops, exec, req, false); } EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map_exec_lock); @@ -2608,21 +2706,50 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = { .sm_step_unmap = drm_gpuva_sm_step, }; +static struct drm_gpuva_ops * +__drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, + const struct drm_gpuvm_map_req *req, + bool madvise) +{ + struct drm_gpuva_ops *ops; + struct { + struct drm_gpuvm *vm; + struct drm_gpuva_ops *ops; + } args; + int ret; + + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (unlikely(!ops)) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ops->list); + + args.vm = gpuvm; + args.ops = ops; + + ret = __drm_gpuvm_sm_map(gpuvm, &gpuvm_list_ops, &args, req, madvise); + if (ret) + goto err_free_ops; + + return ops; + +err_free_ops: + drm_gpuva_ops_free(gpuvm, ops); + return ERR_PTR(ret); +} + /** * drm_gpuvm_sm_map_ops_create() - creates the &drm_gpuva_ops to split and merge * @gpuvm: the &drm_gpuvm representing the GPU VA space - * @req_addr: the start address of the new mapping - * @req_range: the range of the new mapping - * @req_obj: the &drm_gem_object to map - * @req_offset: the offset within the &drm_gem_object + * @req: map request arguments * * This function creates a list of operations to perform splitting and merging - * of existent mapping(s) with the newly requested one. + * of existing mapping(s) with the newly requested one. * * The list can be iterated with &drm_gpuva_for_each_op and must be processed * in the given order. It can contain map, unmap and remap operations, but it * also can be empty if no operation is required, e.g. if the requested mapping - * already exists is the exact same way. + * already exists in the exact same way. * * There can be an arbitrary amount of unmap operations, a maximum of two remap * operations and a single map operation. The latter one represents the original @@ -2642,40 +2769,50 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = { */ struct drm_gpuva_ops * drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, - u64 req_addr, u64 req_range, - struct drm_gem_object *req_obj, u64 req_offset) + const struct drm_gpuvm_map_req *req) { - struct drm_gpuva_ops *ops; - struct { - struct drm_gpuvm *vm; - struct drm_gpuva_ops *ops; - } args; - int ret; - - ops = kzalloc(sizeof(*ops), GFP_KERNEL); - if (unlikely(!ops)) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&ops->list); - - args.vm = gpuvm; - args.ops = ops; - - ret = __drm_gpuvm_sm_map(gpuvm, &gpuvm_list_ops, &args, - req_addr, req_range, - req_obj, req_offset); - if (ret) - goto err_free_ops; - - return ops; - -err_free_ops: - drm_gpuva_ops_free(gpuvm, ops); - return ERR_PTR(ret); + return __drm_gpuvm_sm_map_ops_create(gpuvm, req, false); } EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map_ops_create); /** + * drm_gpuvm_madvise_ops_create() - creates the &drm_gpuva_ops to split + * @gpuvm: the &drm_gpuvm representing the GPU VA space + * @req: map request arguments + * + * This function creates a list of operations to perform splitting + * of existent mapping(s) at start or end, based on the request map. + * + * The list can be iterated with &drm_gpuva_for_each_op and must be processed + * in the given order. It can contain map and remap operations, but it + * also can be empty if no operation is required, e.g. if the requested mapping + * already exists is the exact same way. + * + * There will be no unmap operations, a maximum of two remap operations and two + * map operations. The two map operations correspond to: one from start to the + * end of drm_gpuvaX, and another from the start of drm_gpuvaY to end. + * + * Note that before calling this function again with another mapping request it + * is necessary to update the &drm_gpuvm's view of the GPU VA space. The + * previously obtained operations must be either processed or abandoned. To + * update the &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(), + * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be + * used. + * + * After the caller finished processing the returned &drm_gpuva_ops, they must + * be freed with &drm_gpuva_ops_free. + * + * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure + */ +struct drm_gpuva_ops * +drm_gpuvm_madvise_ops_create(struct drm_gpuvm *gpuvm, + const struct drm_gpuvm_map_req *req) +{ + return __drm_gpuvm_sm_map_ops_create(gpuvm, req, true); +} +EXPORT_SYMBOL_GPL(drm_gpuvm_madvise_ops_create); + +/** * drm_gpuvm_sm_unmap_ops_create() - creates the &drm_gpuva_ops to split on * unmap * @gpuvm: the &drm_gpuvm representing the GPU VA space @@ -2804,8 +2941,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); * After the caller finished processing the returned &drm_gpuva_ops, they must * be freed with &drm_gpuva_ops_free. * - * It is the callers responsibility to protect the GEMs GPUVA list against - * concurrent access using the GEMs dma_resv lock. + * This function expects the caller to protect the GEM's GPUVA list against + * concurrent access using either the GEM's dma-resv or gpuva.lock mutex. * * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure */ @@ -2817,7 +2954,7 @@ drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo) struct drm_gpuva *va; int ret; - drm_gem_gpuva_assert_lock_held(vm_bo->obj); + drm_gem_gpuva_assert_lock_held(vm_bo->vm, vm_bo->obj); ops = kzalloc(sizeof(*ops), GFP_KERNEL); if (!ops) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 442eb31351dd..5a3bed48ab1f 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -85,6 +85,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv); void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv); +int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, + struct dma_buf *dma_buf, uint32_t handle); void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle); @@ -161,6 +163,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); @@ -168,6 +172,8 @@ int drm_gem_close_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int drm_gem_flink_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int drm_gem_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void drm_gem_open(struct drm_device *dev, struct drm_file *file_private); @@ -182,8 +188,7 @@ void drm_gem_vunmap_locked(struct drm_gem_object *obj, struct iosys_map *map); #if defined(CONFIG_DEBUG_FS) void drm_debugfs_dev_fini(struct drm_device *dev); void drm_debugfs_dev_register(struct drm_device *dev); -int drm_debugfs_register(struct drm_minor *minor, int minor_id, - struct dentry *root); +int drm_debugfs_register(struct drm_minor *minor, int minor_id); void drm_debugfs_unregister(struct drm_minor *minor); void drm_debugfs_connector_add(struct drm_connector *connector); void drm_debugfs_connector_remove(struct drm_connector *connector); @@ -201,8 +206,7 @@ static inline void drm_debugfs_dev_register(struct drm_device *dev) { } -static inline int drm_debugfs_register(struct drm_minor *minor, int minor_id, - struct dentry *root) +static inline int drm_debugfs_register(struct drm_minor *minor, int minor_id) { return 0; } diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f593dc569d31..d8a24875a7ba 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -653,6 +653,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH), + DRM_IOCTL_DEF(DRM_IOCTL_GEM_CHANGE_HANDLE, drm_gem_change_handle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, 0), diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index ba4be6be5d28..e33c78fc8fbd 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -230,7 +230,13 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer * case DRM_FORMAT_XRGB8888: switch (dbidev->pixel_format) { case DRM_FORMAT_RGB565: - drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap); + if (swap) { + drm_fb_xrgb8888_to_rgb565be(&dst_map, NULL, src, fb, clip, + fmtcnv_state); + } else { + drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, + fmtcnv_state); + } break; case DRM_FORMAT_RGB888: drm_fb_xrgb8888_to_rgb888(&dst_map, NULL, src, fb, clip, fmtcnv_state); diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index a00d76443128..a712e177b350 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -92,12 +92,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a @@ -772,41 +773,13 @@ ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, EXPORT_SYMBOL(mipi_dsi_generic_write); /** - * mipi_dsi_generic_write_chatty() - mipi_dsi_generic_write() w/ an error log - * @dsi: DSI peripheral device - * @payload: buffer containing the payload - * @size: size of payload buffer - * - * Like mipi_dsi_generic_write() but includes a dev_err() - * call for you and returns 0 upon success, not the number of bytes sent. - * - * Return: 0 on success or a negative error code on failure. - */ -int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, - const void *payload, size_t size) -{ - struct device *dev = &dsi->dev; - ssize_t ret; - - ret = mipi_dsi_generic_write(dsi, payload, size); - if (ret < 0) { - dev_err(dev, "sending generic data %*ph failed: %zd\n", - (int)size, payload, ret); - return ret; - } - - return 0; -} -EXPORT_SYMBOL(mipi_dsi_generic_write_chatty); - -/** - * mipi_dsi_generic_write_multi() - mipi_dsi_generic_write_chatty() w/ accum_err + * mipi_dsi_generic_write_multi() - mipi_dsi_generic_write() w/ accum_err * @ctx: Context for multiple DSI transactions * @payload: buffer containing the payload * @size: size of payload buffer * - * Like mipi_dsi_generic_write_chatty() but deals with errors in a way that - * makes it convenient to make several calls in a row. + * A wrapper around mipi_dsi_generic_write() that deals with errors in a way + * that makes it convenient to make several calls in a row. */ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, const void *payload, size_t size) @@ -828,6 +801,30 @@ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, EXPORT_SYMBOL(mipi_dsi_generic_write_multi); /** + * mipi_dsi_dual_generic_write_multi() - mipi_dsi_generic_write_multi() for + * two dsi channels, one after the other + * @ctx: Context for multiple DSI transactions + * @dsi1: First dsi channel to write buffer to + * @dsi2: Second dsi channel to write buffer to + * @payload: Buffer containing the payload + * @size: Size of payload buffer + * + * A wrapper around mipi_dsi_generic_write_multi() that allows the user to + * conveniently write to two dsi channels, one after the other. + */ +void mipi_dsi_dual_generic_write_multi(struct mipi_dsi_multi_context *ctx, + struct mipi_dsi_device *dsi1, + struct mipi_dsi_device *dsi2, + const void *payload, size_t size) +{ + ctx->dsi = dsi1; + mipi_dsi_generic_write_multi(ctx, payload, size); + ctx->dsi = dsi2; + mipi_dsi_generic_write_multi(ctx, payload, size); +} +EXPORT_SYMBOL(mipi_dsi_dual_generic_write_multi); + +/** * mipi_dsi_generic_read() - receive data using a generic read packet * @dsi: DSI peripheral device * @params: buffer containing the request parameters @@ -1007,6 +1004,30 @@ void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, EXPORT_SYMBOL(mipi_dsi_dcs_write_buffer_multi); /** + * mipi_dsi_dual_dcs_write_buffer_multi - mipi_dsi_dcs_write_buffer_multi() for + * two dsi channels, one after the other + * @ctx: Context for multiple DSI transactions + * @dsi1: First dsi channel to write buffer to + * @dsi2: Second dsi channel to write buffer to + * @data: Buffer containing data to be transmitted + * @len: Size of transmission buffer + * + * A wrapper around mipi_dsi_dcs_write_buffer_multi() that allows the user to + * conveniently write to two dsi channels, one after the other. + */ +void mipi_dsi_dual_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, + struct mipi_dsi_device *dsi1, + struct mipi_dsi_device *dsi2, + const void *data, size_t len) +{ + ctx->dsi = dsi1; + mipi_dsi_dcs_write_buffer_multi(ctx, data, len); + ctx->dsi = dsi2; + mipi_dsi_dcs_write_buffer_multi(ctx, data, len); +} +EXPORT_SYMBOL(mipi_dsi_dual_dcs_write_buffer_multi); + +/** * mipi_dsi_dcs_write() - send DCS write command * @dsi: DSI peripheral device * @cmd: DCS command @@ -1076,6 +1097,43 @@ ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, EXPORT_SYMBOL(mipi_dsi_dcs_read); /** + * mipi_dsi_dcs_read_multi() - mipi_dsi_dcs_read() w/ accum_err + * @ctx: Context for multiple DSI transactions + * @cmd: DCS command + * @data: buffer in which to receive data + * @len: size of receive buffer + * + * Like mipi_dsi_dcs_read() but deals with errors in a way that makes it + * convenient to make several calls in a row. + */ +void mipi_dsi_dcs_read_multi(struct mipi_dsi_multi_context *ctx, u8 cmd, + void *data, size_t len) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + struct mipi_dsi_msg msg = { + .channel = dsi->channel, + .type = MIPI_DSI_DCS_READ, + .tx_buf = &cmd, + .tx_len = 1, + .rx_buf = data, + .rx_len = len + }; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_device_transfer(dsi, &msg); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "dcs read with command %#x failed: %d\n", cmd, + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_read_multi); + +/** * mipi_dsi_dcs_nop() - send DCS nop packet * @dsi: DSI peripheral device * diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index ef32f6af10d4..988735560570 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -74,6 +74,7 @@ EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head); * drm_helper_mode_fill_fb_struct - fill out framebuffer metadata * @dev: DRM device * @fb: drm_framebuffer object to fill out + * @info: pixel format information * @mode_cmd: metadata from the userspace fb creation request * * This helper can be used in a drivers fb_create callback to pre-fill the fb's @@ -81,12 +82,13 @@ EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head); */ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { int i; fb->dev = dev; - fb->format = drm_get_format_info(dev, mode_cmd); + fb->format = info; fb->width = mode_cmd->width; fb->height = mode_cmd->height; for (i = 0; i < 4; i++) { diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index d0183dea7703..4f65ce729a47 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -55,7 +55,8 @@ EXPORT_SYMBOL(drm_of_crtc_port_mask); * and generate the DRM mask of CRTCs which may be attached to this * encoder. * - * See Documentation/devicetree/bindings/graph.txt for the bindings. + * See https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/graph.yaml + * for the bindings. */ uint32_t drm_of_find_possible_crtcs(struct drm_device *dev, struct device_node *port) @@ -106,7 +107,9 @@ EXPORT_SYMBOL_GPL(drm_of_component_match_add); * Parse the platform device OF node and bind all the components associated * with the master. Interface ports are added before the encoders in order to * satisfy their .bind requirements - * See Documentation/devicetree/bindings/graph.txt for the bindings. + * + * See https://github.com/devicetree-org/dt-schema/blob/main/dtschema/schemas/graph.yaml + * for the bindings. * * Returns zero if successful, or one of the standard error codes if it fails. */ diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c new file mode 100644 index 000000000000..22c44807e3fe --- /dev/null +++ b/drivers/gpu/drm/drm_pagemap.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <linux/dma-mapping.h> +#include <linux/migrate.h> +#include <linux/pagemap.h> +#include <drm/drm_drv.h> +#include <drm/drm_pagemap.h> + +/** + * DOC: Overview + * + * The DRM pagemap layer is intended to augment the dev_pagemap functionality by + * providing a way to populate a struct mm_struct virtual range with device + * private pages and to provide helpers to abstract device memory allocations, + * to migrate memory back and forth between device memory and system RAM and + * to handle access (and in the future migration) between devices implementing + * a fast interconnect that is not necessarily visible to the rest of the + * system. + * + * Typically the DRM pagemap receives requests from one or more DRM GPU SVM + * instances to populate struct mm_struct virtual ranges with memory, and the + * migration is best effort only and may thus fail. The implementation should + * also handle device unbinding by blocking (return an -ENODEV) error for new + * population requests and after that migrate all device pages to system ram. + */ + +/** + * DOC: Migration + * + * Migration granularity typically follows the GPU SVM range requests, but + * if there are clashes, due to races or due to the fact that multiple GPU + * SVM instances have different views of the ranges used, and because of that + * parts of a requested range is already present in the requested device memory, + * the implementation has a variety of options. It can fail and it can choose + * to populate only the part of the range that isn't already in device memory, + * and it can evict the range to system before trying to migrate. Ideally an + * implementation would just try to migrate the missing part of the range and + * allocate just enough memory to do so. + * + * When migrating to system memory as a response to a cpu fault or a device + * memory eviction request, currently a full device memory allocation is + * migrated back to system. Moving forward this might need improvement for + * situations where a single page needs bouncing between system memory and + * device memory due to, for example, atomic operations. + * + * Key DRM pagemap components: + * + * - Device Memory Allocations: + * Embedded structure containing enough information for the drm_pagemap to + * migrate to / from device memory. + * + * - Device Memory Operations: + * Define the interface for driver-specific device memory operations + * release memory, populate pfns, and copy to / from device memory. + */ + +/** + * struct drm_pagemap_zdd - GPU SVM zone device data + * + * @refcount: Reference count for the zdd + * @devmem_allocation: device memory allocation + * @device_private_page_owner: Device private pages owner + * + * This structure serves as a generic wrapper installed in + * page->zone_device_data. It provides infrastructure for looking up a device + * memory allocation upon CPU page fault and asynchronously releasing device + * memory once the CPU has no page references. Asynchronous release is useful + * because CPU page references can be dropped in IRQ contexts, while releasing + * device memory likely requires sleeping locks. + */ +struct drm_pagemap_zdd { + struct kref refcount; + struct drm_pagemap_devmem *devmem_allocation; + void *device_private_page_owner; +}; + +/** + * drm_pagemap_zdd_alloc() - Allocate a zdd structure. + * @device_private_page_owner: Device private pages owner + * + * This function allocates and initializes a new zdd structure. It sets up the + * reference count and initializes the destroy work. + * + * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. + */ +static struct drm_pagemap_zdd * +drm_pagemap_zdd_alloc(void *device_private_page_owner) +{ + struct drm_pagemap_zdd *zdd; + + zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); + if (!zdd) + return NULL; + + kref_init(&zdd->refcount); + zdd->devmem_allocation = NULL; + zdd->device_private_page_owner = device_private_page_owner; + + return zdd; +} + +/** + * drm_pagemap_zdd_get() - Get a reference to a zdd structure. + * @zdd: Pointer to the zdd structure. + * + * This function increments the reference count of the provided zdd structure. + * + * Return: Pointer to the zdd structure. + */ +static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd) +{ + kref_get(&zdd->refcount); + return zdd; +} + +/** + * drm_pagemap_zdd_destroy() - Destroy a zdd structure. + * @ref: Pointer to the reference count structure. + * + * This function queues the destroy_work of the zdd for asynchronous destruction. + */ +static void drm_pagemap_zdd_destroy(struct kref *ref) +{ + struct drm_pagemap_zdd *zdd = + container_of(ref, struct drm_pagemap_zdd, refcount); + struct drm_pagemap_devmem *devmem = zdd->devmem_allocation; + + if (devmem) { + complete_all(&devmem->detached); + if (devmem->ops->devmem_release) + devmem->ops->devmem_release(devmem); + } + kfree(zdd); +} + +/** + * drm_pagemap_zdd_put() - Put a zdd reference. + * @zdd: Pointer to the zdd structure. + * + * This function decrements the reference count of the provided zdd structure + * and schedules its destruction if the count drops to zero. + */ +static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd) +{ + kref_put(&zdd->refcount, drm_pagemap_zdd_destroy); +} + +/** + * drm_pagemap_migration_unlock_put_page() - Put a migration page + * @page: Pointer to the page to put + * + * This function unlocks and puts a page. + */ +static void drm_pagemap_migration_unlock_put_page(struct page *page) +{ + unlock_page(page); + put_page(page); +} + +/** + * drm_pagemap_migration_unlock_put_pages() - Put migration pages + * @npages: Number of pages + * @migrate_pfn: Array of migrate page frame numbers + * + * This function unlocks and puts an array of pages. + */ +static void drm_pagemap_migration_unlock_put_pages(unsigned long npages, + unsigned long *migrate_pfn) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) { + struct page *page; + + if (!migrate_pfn[i]) + continue; + + page = migrate_pfn_to_page(migrate_pfn[i]); + drm_pagemap_migration_unlock_put_page(page); + migrate_pfn[i] = 0; + } +} + +/** + * drm_pagemap_get_devmem_page() - Get a reference to a device memory page + * @page: Pointer to the page + * @zdd: Pointer to the GPU SVM zone device data + * + * This function associates the given page with the specified GPU SVM zone + * device data and initializes it for zone device usage. + */ +static void drm_pagemap_get_devmem_page(struct page *page, + struct drm_pagemap_zdd *zdd) +{ + page->zone_device_data = drm_pagemap_zdd_get(zdd); + zone_device_page_init(page); +} + +/** + * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration + * @dev: The device for which the pages are being mapped + * @pagemap_addr: Array to store DMA information corresponding to mapped pages + * @migrate_pfn: Array of migrate page frame numbers to map + * @npages: Number of pages to map + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * + * This function maps pages of memory for migration usage in GPU SVM. It + * iterates over each page frame number provided in @migrate_pfn, maps the + * corresponding page, and stores the DMA address in the provided @dma_addr + * array. + * + * Returns: 0 on success, -EFAULT if an error occurs during mapping. + */ +static int drm_pagemap_migrate_map_pages(struct device *dev, + struct drm_pagemap_addr *pagemap_addr, + unsigned long *migrate_pfn, + unsigned long npages, + enum dma_data_direction dir) +{ + unsigned long i; + + for (i = 0; i < npages;) { + struct page *page = migrate_pfn_to_page(migrate_pfn[i]); + dma_addr_t dma_addr; + struct folio *folio; + unsigned int order = 0; + + if (!page) + goto next; + + if (WARN_ON_ONCE(is_zone_device_page(page))) + return -EFAULT; + + folio = page_folio(page); + order = folio_order(folio); + + dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); + if (dma_mapping_error(dev, dma_addr)) + return -EFAULT; + + pagemap_addr[i] = + drm_pagemap_addr_encode(dma_addr, + DRM_INTERCONNECT_SYSTEM, + order, dir); + +next: + i += NR_PAGES(order); + } + + return 0; +} + +/** + * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration + * @dev: The device for which the pages were mapped + * @pagemap_addr: Array of DMA information corresponding to mapped pages + * @npages: Number of pages to unmap + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * + * This function unmaps previously mapped pages of memory for GPU Shared Virtual + * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks + * if it's valid and not already unmapped, and unmaps the corresponding page. + */ +static void drm_pagemap_migrate_unmap_pages(struct device *dev, + struct drm_pagemap_addr *pagemap_addr, + unsigned long npages, + enum dma_data_direction dir) +{ + unsigned long i; + + for (i = 0; i < npages;) { + if (!pagemap_addr[i].addr || dma_mapping_error(dev, pagemap_addr[i].addr)) + goto next; + + dma_unmap_page(dev, pagemap_addr[i].addr, PAGE_SIZE << pagemap_addr[i].order, dir); + +next: + i += NR_PAGES(pagemap_addr[i].order); + } +} + +static unsigned long +npages_in_range(unsigned long start, unsigned long end) +{ + return (end - start) >> PAGE_SHIFT; +} + +/** + * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory + * @devmem_allocation: The device memory allocation to migrate to. + * The caller should hold a reference to the device memory allocation, + * and the reference is consumed by this function unless it returns with + * an error. + * @mm: Pointer to the struct mm_struct. + * @start: Start of the virtual address range to migrate. + * @end: End of the virtual address range to migrate. + * @timeslice_ms: The time requested for the migrated pagemap pages to + * be present in @mm before being allowed to be migrated back. + * @pgmap_owner: Not used currently, since only system memory is considered. + * + * This function migrates the specified virtual address range to device memory. + * It performs the necessary setup and invokes the driver-specific operations for + * migration to device memory. Expected to be called while holding the mmap lock in + * at least read mode. + * + * Note: The @timeslice_ms parameter can typically be used to force data to + * remain in pagemap pages long enough for a GPU to perform a task and to prevent + * a migration livelock. One alternative would be for the GPU driver to block + * in a mmu_notifier for the specified amount of time, but adding the + * functionality to the pagemap is likely nicer to the system as a whole. + * + * Return: %0 on success, negative error code on failure. + */ +int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, + struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned long timeslice_ms, + void *pgmap_owner) +{ + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + struct migrate_vma migrate = { + .start = start, + .end = end, + .pgmap_owner = pgmap_owner, + .flags = MIGRATE_VMA_SELECT_SYSTEM, + }; + unsigned long i, npages = npages_in_range(start, end); + struct vm_area_struct *vas; + struct drm_pagemap_zdd *zdd = NULL; + struct page **pages; + struct drm_pagemap_addr *pagemap_addr; + void *buf; + int err; + + mmap_assert_locked(mm); + + if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || + !ops->copy_to_ram) + return -EOPNOTSUPP; + + vas = vma_lookup(mm, start); + if (!vas) { + err = -ENOENT; + goto err_out; + } + + if (end > vas->vm_end || start < vas->vm_start) { + err = -EINVAL; + goto err_out; + } + + if (!vma_is_anonymous(vas)) { + err = -EBUSY; + goto err_out; + } + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; + + zdd = drm_pagemap_zdd_alloc(pgmap_owner); + if (!zdd) { + err = -ENOMEM; + goto err_free; + } + + migrate.vma = vas; + migrate.src = buf; + migrate.dst = migrate.src + npages; + + err = migrate_vma_setup(&migrate); + if (err) + goto err_free; + + if (!migrate.cpages) { + err = -EFAULT; + goto err_free; + } + + if (migrate.cpages != npages) { + err = -EBUSY; + goto err_finalize; + } + + err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); + if (err) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, + migrate.src, npages, DMA_TO_DEVICE); + + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) { + struct page *page = pfn_to_page(migrate.dst[i]); + + pages[i] = page; + migrate.dst[i] = migrate_pfn(migrate.dst[i]); + drm_pagemap_get_devmem_page(page, zdd); + } + + err = ops->copy_to_devmem(pages, pagemap_addr, npages); + if (err) + goto err_finalize; + + /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(timeslice_ms); + zdd->devmem_allocation = devmem_allocation; /* Owns ref */ + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); + migrate_vma_pages(&migrate); + migrate_vma_finalize(&migrate); + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, + DMA_TO_DEVICE); +err_free: + if (zdd) + drm_pagemap_zdd_put(zdd); + kvfree(buf); +err_out: + return err; +} +EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem); + +/** + * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area + * @vas: Pointer to the VM area structure, can be NULL + * @fault_page: Fault page + * @npages: Number of pages to populate + * @mpages: Number of pages to migrate + * @src_mpfn: Source array of migrate PFNs + * @mpfn: Array of migrate PFNs to populate + * @addr: Start address for PFN allocation + * + * This function populates the RAM migrate page frame numbers (PFNs) for the + * specified VM area structure. It allocates and locks pages in the VM area for + * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use + * alloc_page for allocation. + * + * Return: 0 on success, negative error code on failure. + */ +static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, + struct page *fault_page, + unsigned long npages, + unsigned long *mpages, + unsigned long *src_mpfn, + unsigned long *mpfn, + unsigned long addr) +{ + unsigned long i; + + for (i = 0; i < npages;) { + struct page *page = NULL, *src_page; + struct folio *folio; + unsigned int order = 0; + + if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) + goto next; + + src_page = migrate_pfn_to_page(src_mpfn[i]); + if (!src_page) + goto next; + + if (fault_page) { + if (src_page->zone_device_data != + fault_page->zone_device_data) + goto next; + } + + order = folio_order(page_folio(src_page)); + + /* TODO: Support fallback to single pages if THP allocation fails */ + if (vas) + folio = vma_alloc_folio(GFP_HIGHUSER, order, vas, addr); + else + folio = folio_alloc(GFP_HIGHUSER, order); + + if (!folio) + goto free_pages; + + page = folio_page(folio, 0); + mpfn[i] = migrate_pfn(page_to_pfn(page)); + +next: + if (page) + addr += page_size(page); + else + addr += PAGE_SIZE; + + i += NR_PAGES(order); + } + + for (i = 0; i < npages;) { + struct page *page = migrate_pfn_to_page(mpfn[i]); + unsigned int order = 0; + + if (!page) + goto next_lock; + + WARN_ON_ONCE(!folio_trylock(page_folio(page))); + + order = folio_order(page_folio(page)); + *mpages += NR_PAGES(order); + +next_lock: + i += NR_PAGES(order); + } + + return 0; + +free_pages: + for (i = 0; i < npages;) { + struct page *page = migrate_pfn_to_page(mpfn[i]); + unsigned int order = 0; + + if (!page) + goto next_put; + + put_page(page); + mpfn[i] = 0; + + order = folio_order(page_folio(page)); + +next_put: + i += NR_PAGES(order); + } + return -ENOMEM; +} + +/** + * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM + * @devmem_allocation: Pointer to the device memory allocation + * + * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and + * migration done via migrate_device_* functions. + * + * Return: 0 on success, negative error code on failure. + */ +int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) +{ + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + unsigned long npages, mpages = 0; + struct page **pages; + unsigned long *src, *dst; + struct drm_pagemap_addr *pagemap_addr; + void *buf; + int i, err = 0; + unsigned int retry_count = 2; + + npages = devmem_allocation->size >> PAGE_SHIFT; + +retry: + if (!mmget_not_zero(devmem_allocation->mm)) + return -EFAULT; + + buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*pagemap_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + src = buf; + dst = buf + (sizeof(*src) * npages); + pagemap_addr = buf + (2 * sizeof(*src) * npages); + pages = buf + (2 * sizeof(*src) + sizeof(*pagemap_addr)) * npages; + + err = ops->populate_devmem_pfn(devmem_allocation, npages, src); + if (err) + goto err_free; + + err = migrate_device_pfns(src, npages); + if (err) + goto err_free; + + err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, + src, dst, 0); + if (err || !mpages) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, pagemap_addr, + dst, npages, DMA_FROM_DEVICE); + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) + pages[i] = migrate_pfn_to_page(src[i]); + + err = ops->copy_to_ram(pages, pagemap_addr, npages); + if (err) + goto err_finalize; + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, dst); + migrate_device_pages(src, dst, npages); + migrate_device_finalize(src, dst, npages); + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, npages, + DMA_FROM_DEVICE); +err_free: + kvfree(buf); +err_out: + mmput_async(devmem_allocation->mm); + + if (completion_done(&devmem_allocation->detached)) + return 0; + + if (retry_count--) { + cond_resched(); + goto retry; + } + + return err ?: -EBUSY; +} +EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); + +/** + * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) + * @vas: Pointer to the VM area structure + * @device_private_page_owner: Device private pages owner + * @page: Pointer to the page for fault handling (can be NULL) + * @fault_addr: Fault address + * @size: Size of migration + * + * This internal function performs the migration of the specified GPU SVM range + * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and + * invokes the driver-specific operations for migration to RAM. + * + * Return: 0 on success, negative error code on failure. + */ +static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, + void *device_private_page_owner, + struct page *page, + unsigned long fault_addr, + unsigned long size) +{ + struct migrate_vma migrate = { + .vma = vas, + .pgmap_owner = device_private_page_owner, + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | + MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .fault_page = page, + }; + struct drm_pagemap_zdd *zdd; + const struct drm_pagemap_devmem_ops *ops; + struct device *dev = NULL; + unsigned long npages, mpages = 0; + struct page **pages; + struct drm_pagemap_addr *pagemap_addr; + unsigned long start, end; + void *buf; + int i, err = 0; + + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + + start = ALIGN_DOWN(fault_addr, size); + end = ALIGN(fault_addr + 1, size); + + /* Corner where VMA area struct has been partially unmapped */ + if (start < vas->vm_start) + start = vas->vm_start; + if (end > vas->vm_end) + end = vas->vm_end; + + migrate.start = start; + migrate.end = end; + npages = npages_in_range(start, end); + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*pagemap_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + pagemap_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*pagemap_addr)) * npages; + + migrate.vma = vas; + migrate.src = buf; + migrate.dst = migrate.src + npages; + + err = migrate_vma_setup(&migrate); + if (err) + goto err_free; + + /* Raced with another CPU fault, nothing to do */ + if (!migrate.cpages) + goto err_free; + + if (!page) { + for (i = 0; i < npages; ++i) { + if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + page = migrate_pfn_to_page(migrate.src[i]); + break; + } + + if (!page) + goto err_finalize; + } + zdd = page->zone_device_data; + ops = zdd->devmem_allocation->ops; + dev = zdd->devmem_allocation->dev; + + err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages, + migrate.src, migrate.dst, + start); + if (err) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(dev, pagemap_addr, migrate.dst, npages, + DMA_FROM_DEVICE); + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) + pages[i] = migrate_pfn_to_page(migrate.src[i]); + + err = ops->copy_to_ram(pages, pagemap_addr, npages); + if (err) + goto err_finalize; + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); + migrate_vma_pages(&migrate); + migrate_vma_finalize(&migrate); + if (dev) + drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, npages, + DMA_FROM_DEVICE); +err_free: + kvfree(buf); +err_out: + + return err; +} + +/** + * drm_pagemap_page_free() - Put GPU SVM zone device data associated with a page + * @page: Pointer to the page + * + * This function is a callback used to put the GPU SVM zone device data + * associated with a page when it is being released. + */ +static void drm_pagemap_page_free(struct page *page) +{ + drm_pagemap_zdd_put(page->zone_device_data); +} + +/** + * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler) + * @vmf: Pointer to the fault information structure + * + * This function is a page fault handler used to migrate a virtual range + * to ram. The device memory allocation in which the device page is found is + * migrated in its entirety. + * + * Returns: + * VM_FAULT_SIGBUS on failure, 0 on success. + */ +static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) +{ + struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data; + int err; + + err = __drm_pagemap_migrate_to_ram(vmf->vma, + zdd->device_private_page_owner, + vmf->page, vmf->address, + zdd->devmem_allocation->size); + + return err ? VM_FAULT_SIGBUS : 0; +} + +static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = { + .page_free = drm_pagemap_page_free, + .migrate_to_ram = drm_pagemap_migrate_to_ram, +}; + +/** + * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations + * + * Returns: + * Pointer to the GPU SVM device page map operations structure. + */ +const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void) +{ + return &drm_pagemap_pagemap_ops; +} +EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); + +/** + * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation + * + * @devmem_allocation: The struct drm_pagemap_devmem to initialize. + * @dev: Pointer to the device structure which device memory allocation belongs to + * @mm: Pointer to the mm_struct for the address space + * @ops: Pointer to the operations structure for GPU SVM device memory + * @dpagemap: The struct drm_pagemap we're allocating from. + * @size: Size of device memory allocation + */ +void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, + struct device *dev, struct mm_struct *mm, + const struct drm_pagemap_devmem_ops *ops, + struct drm_pagemap *dpagemap, size_t size) +{ + init_completion(&devmem_allocation->detached); + devmem_allocation->dev = dev; + devmem_allocation->mm = mm; + devmem_allocation->ops = ops; + devmem_allocation->dpagemap = dpagemap; + devmem_allocation->size = size; +} +EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); + +/** + * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page + * @page: The struct page. + * + * Return: A pointer to the struct drm_pagemap of a device private page that + * was populated from the struct drm_pagemap. If the page was *not* populated + * from a struct drm_pagemap, the result is undefined and the function call + * may result in dereferencing and invalid address. + */ +struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) +{ + struct drm_pagemap_zdd *zdd = page->zone_device_data; + + return zdd->devmem_allocation->dpagemap; +} +EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap); + +/** + * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages + * @dpagemap: Pointer to the drm_pagemap managing the device memory + * @start: Start of the virtual range to populate. + * @end: End of the virtual range to populate. + * @mm: Pointer to the virtual address space. + * @timeslice_ms: The time requested for the migrated pagemap pages to + * be present in @mm before being allowed to be migrated back. + * + * Attempt to populate a virtual range with device memory pages, + * clearing them or migrating data from the existing pages if necessary. + * The function is best effort only, and implementations may vary + * in how hard they try to satisfy the request. + * + * Return: %0 on success, negative error code on error. If the hardware + * device was removed / unbound the function will return %-ENODEV. + */ +int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) +{ + int err; + + if (!mmget_not_zero(mm)) + return -EFAULT; + mmap_read_lock(mm); + err = dpagemap->ops->populate_mm(dpagemap, start, end, mm, + timeslice_ms); + mmap_read_unlock(mm); + mmput(mm); + + return err; +} +EXPORT_SYMBOL(drm_pagemap_populate_mm); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index c8bb28dccdc1..d1e6598ea3bc 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -134,6 +134,9 @@ void drm_panel_prepare(struct drm_panel *panel) panel->prepared = true; list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_prepared) + continue; + ret = follower->funcs->panel_prepared(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", @@ -179,6 +182,9 @@ void drm_panel_unprepare(struct drm_panel *panel) mutex_lock(&panel->follower_lock); list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_unpreparing) + continue; + ret = follower->funcs->panel_unpreparing(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", @@ -209,6 +215,7 @@ EXPORT_SYMBOL(drm_panel_unprepare); */ void drm_panel_enable(struct drm_panel *panel) { + struct drm_panel_follower *follower; int ret; if (!panel) @@ -219,10 +226,12 @@ void drm_panel_enable(struct drm_panel *panel) return; } + mutex_lock(&panel->follower_lock); + if (panel->funcs && panel->funcs->enable) { ret = panel->funcs->enable(panel); if (ret < 0) - return; + goto exit; } panel->enabled = true; @@ -230,6 +239,19 @@ void drm_panel_enable(struct drm_panel *panel) if (ret < 0) DRM_DEV_INFO(panel->dev, "failed to enable backlight: %d\n", ret); + + list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_enabled) + continue; + + ret = follower->funcs->panel_enabled(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_enabled, ret); + } + +exit: + mutex_unlock(&panel->follower_lock); } EXPORT_SYMBOL(drm_panel_enable); @@ -243,6 +265,7 @@ EXPORT_SYMBOL(drm_panel_enable); */ void drm_panel_disable(struct drm_panel *panel) { + struct drm_panel_follower *follower; int ret; if (!panel) @@ -262,6 +285,18 @@ void drm_panel_disable(struct drm_panel *panel) return; } + mutex_lock(&panel->follower_lock); + + list_for_each_entry(follower, &panel->followers, list) { + if (!follower->funcs->panel_disabling) + continue; + + ret = follower->funcs->panel_disabling(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_disabling, ret); + } + ret = backlight_disable(panel->backlight); if (ret < 0) DRM_DEV_INFO(panel->dev, "failed to disable backlight: %d\n", @@ -270,9 +305,12 @@ void drm_panel_disable(struct drm_panel *panel) if (panel->funcs && panel->funcs->disable) { ret = panel->funcs->disable(panel); if (ret < 0) - return; + goto exit; } panel->enabled = false; + +exit: + mutex_unlock(&panel->follower_lock); } EXPORT_SYMBOL(drm_panel_disable); @@ -539,13 +577,13 @@ EXPORT_SYMBOL(drm_is_panel_follower); * @follower_dev: The 'struct device' for the follower. * @follower: The panel follower descriptor for the follower. * - * A panel follower is called right after preparing the panel and right before - * unpreparing the panel. It's primary intention is to power on an associated - * touchscreen, though it could be used for any similar devices. Multiple - * devices are allowed the follow the same panel. + * A panel follower is called right after preparing/enabling the panel and right + * before unpreparing/disabling the panel. It's primary intention is to power on + * an associated touchscreen, though it could be used for any similar devices. + * Multiple devices are allowed the follow the same panel. * - * If a follower is added to a panel that's already been turned on, the - * follower's prepare callback is called right away. + * If a follower is added to a panel that's already been prepared/enabled, the + * follower's prepared/enabled callback is called right away. * * The "panel" property of the follower points to the panel to be followed. * @@ -569,12 +607,18 @@ int drm_panel_add_follower(struct device *follower_dev, mutex_lock(&panel->follower_lock); list_add_tail(&follower->list, &panel->followers); - if (panel->prepared) { + if (panel->prepared && follower->funcs->panel_prepared) { ret = follower->funcs->panel_prepared(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", follower->funcs->panel_prepared, ret); } + if (panel->enabled && follower->funcs->panel_enabled) { + ret = follower->funcs->panel_enabled(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_enabled, ret); + } mutex_unlock(&panel->follower_lock); @@ -587,7 +631,8 @@ EXPORT_SYMBOL(drm_panel_add_follower); * @follower: The panel follower descriptor for the follower. * * Undo drm_panel_add_follower(). This includes calling the follower's - * unprepare function if we're removed from a panel that's currently prepared. + * unpreparing/disabling function if we're removed from a panel that's currently + * prepared/enabled. * * Return: 0 or an error code. */ @@ -598,7 +643,13 @@ void drm_panel_remove_follower(struct drm_panel_follower *follower) mutex_lock(&panel->follower_lock); - if (panel->prepared) { + if (panel->enabled && follower->funcs->panel_disabling) { + ret = follower->funcs->panel_disabling(follower); + if (ret < 0) + dev_info(panel->dev, "%ps failed: %d\n", + follower->funcs->panel_disabling, ret); + } + if (panel->prepared && follower->funcs->panel_unpreparing) { ret = follower->funcs->panel_unpreparing(follower); if (ret < 0) dev_info(panel->dev, "%ps failed: %d\n", diff --git a/drivers/gpu/drm/drm_panel_backlight_quirks.c b/drivers/gpu/drm/drm_panel_backlight_quirks.c index 598f812b7cb3..537dc6dd0534 100644 --- a/drivers/gpu/drm/drm_panel_backlight_quirks.c +++ b/drivers/gpu/drm/drm_panel_backlight_quirks.c @@ -8,23 +8,26 @@ #include <drm/drm_edid.h> #include <drm/drm_utils.h> -struct drm_panel_min_backlight_quirk { - struct { - enum dmi_field field; - const char * const value; - } dmi_match; +struct drm_panel_match { + enum dmi_field field; + const char * const value; +}; + +struct drm_get_panel_backlight_quirk { + struct drm_panel_match dmi_match; + struct drm_panel_match dmi_match_other; struct drm_edid_ident ident; - u8 min_brightness; + struct drm_panel_backlight_quirk quirk; }; -static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks[] = { +static const struct drm_get_panel_backlight_quirk drm_panel_min_backlight_quirks[] = { /* 13 inch matte panel */ { .dmi_match.field = DMI_BOARD_VENDOR, .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0bca), .ident.name = "NE135FBM-N41", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, }, /* 13 inch glossy panel */ { @@ -32,7 +35,7 @@ static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x095f), .ident.name = "NE135FBM-N41", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, }, /* 13 inch 2.8k panel */ { @@ -40,56 +43,114 @@ static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks .dmi_match.value = "Framework", .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0cb4), .ident.name = "NE135A1M-NY1", - .min_brightness = 0, + .quirk = { .min_brightness = 1, }, + }, + /* Steam Deck models */ + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "Valve", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "Jupiter", + .quirk = { .min_brightness = 1, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "Valve", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "Galileo", + .quirk = { .min_brightness = 1, }, + }, + /* Have OLED Panels with brightness issue when last byte is 0/1 */ + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "AYANEO", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "AYANEO 3", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ZOTAC", + .dmi_match_other.field = DMI_BOARD_NAME, + .dmi_match_other.value = "G0A1W", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ZOTAC", + .dmi_match_other.field = DMI_BOARD_NAME, + .dmi_match_other.value = "G1A1W", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ONE-NETBOOK", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "ONEXPLAYER F1Pro", + .quirk = { .brightness_mask = 3, }, + }, + { + .dmi_match.field = DMI_SYS_VENDOR, + .dmi_match.value = "ONE-NETBOOK", + .dmi_match_other.field = DMI_PRODUCT_NAME, + .dmi_match_other.value = "ONEXPLAYER F1 EVA-02", + .quirk = { .brightness_mask = 3, }, }, }; -static bool drm_panel_min_backlight_quirk_matches(const struct drm_panel_min_backlight_quirk *quirk, - const struct drm_edid *edid) +static bool drm_panel_min_backlight_quirk_matches( + const struct drm_get_panel_backlight_quirk *quirk, + const struct drm_edid *edid) { - if (!dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + if (quirk->dmi_match.field && + !dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + return false; + + if (quirk->dmi_match_other.field && + !dmi_match(quirk->dmi_match_other.field, + quirk->dmi_match_other.value)) return false; - if (!drm_edid_match(edid, &quirk->ident)) + if (quirk->ident.panel_id && !drm_edid_match(edid, &quirk->ident)) return false; return true; } /** - * drm_get_panel_min_brightness_quirk - Get minimum supported brightness level for a panel. + * drm_get_panel_backlight_quirk - Get backlight quirks for a panel * @edid: EDID of the panel to check * * This function checks for platform specific (e.g. DMI based) quirks * providing info on the minimum backlight brightness for systems where this - * cannot be probed correctly from the hard-/firm-ware. + * cannot be probed correctly from the hard-/firm-ware and other sources. * * Returns: - * A negative error value or - * an override value in the range [0, 255] representing 0-100% to be scaled to - * the drivers target range. + * a drm_panel_backlight_quirk struct if a quirk was found, otherwise an + * error pointer. */ -int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid) +const struct drm_panel_backlight_quirk * +drm_get_panel_backlight_quirk(const struct drm_edid *edid) { - const struct drm_panel_min_backlight_quirk *quirk; + const struct drm_get_panel_backlight_quirk *quirk; size_t i; if (!IS_ENABLED(CONFIG_DMI)) - return -ENODATA; + return ERR_PTR(-ENODATA); if (!edid) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < ARRAY_SIZE(drm_panel_min_backlight_quirks); i++) { quirk = &drm_panel_min_backlight_quirks[i]; if (drm_panel_min_backlight_quirk_matches(quirk, edid)) - return quirk->min_brightness; + return &quirk->quirk; } - return -ENODATA; + return ERR_PTR(-ENODATA); } -EXPORT_SYMBOL(drm_get_panel_min_brightness_quirk); +EXPORT_SYMBOL(drm_get_panel_backlight_quirk); MODULE_DESCRIPTION("Quirks for panel backlight overrides"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 1d6312fa1429..d4b6ea42db0f 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -174,6 +174,33 @@ static void drm_panic_write_pixel24(void *vaddr, unsigned int offset, u32 color) *p = color & 0xff; } +/* + * Special case if the pixel crosses page boundaries + */ +static void drm_panic_write_pixel24_xpage(void *vaddr, struct page *next_page, + unsigned int offset, u32 color) +{ + u8 *vaddr2; + u8 *p = vaddr + offset; + + vaddr2 = kmap_local_page_try_from_panic(next_page); + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 1) + p = vaddr2; + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 2) + p = vaddr2; + + *p = color & 0xff; + kunmap_local(vaddr2); +} + static void drm_panic_write_pixel32(void *vaddr, unsigned int offset, u32 color) { u32 *p = vaddr + offset; @@ -231,7 +258,14 @@ static void drm_panic_blit_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - if (vaddr) + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, fg32); + else drm_panic_write_pixel(vaddr, offset, fg32, cpp); } } @@ -321,7 +355,15 @@ static void drm_panic_fill_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - drm_panic_write_pixel(vaddr, offset, color, cpp); + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, color); + else + drm_panic_write_pixel(vaddr, offset, color, cpp); } } if (vaddr) @@ -429,6 +471,9 @@ static void drm_panic_logo_rect(struct drm_rect *rect, const struct font_desc *f static void drm_panic_logo_draw(struct drm_scanout_buffer *sb, struct drm_rect *rect, const struct font_desc *font, u32 fg_color) { + if (rect->x2 > sb->width || rect->y2 > sb->height) + return; + if (logo_mono) drm_panic_blit(sb, rect, logo_mono->data, DIV_ROUND_UP(drm_rect_width(rect), 8), 1, fg_color); @@ -477,7 +522,7 @@ static int draw_line_with_wrap(struct drm_scanout_buffer *sb, const struct font_ struct drm_panic_line *line, int yoffset, u32 fg_color) { int chars_per_row = sb->width / font->width; - struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, sb->height); + struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, font->height); struct drm_panic_line line_wrap; if (line->len > chars_per_row) { @@ -520,7 +565,7 @@ static void draw_panic_static_kmsg(struct drm_scanout_buffer *sb) struct drm_panic_line line; int yoffset; - if (!font) + if (!font || font->width > sb->width) return; yoffset = sb->height - font->height - (sb->height % font->height) / 2; @@ -733,7 +778,10 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) pr_debug("QR width %d and scale %d\n", qr_width, scale); r_qr_canvas = DRM_RECT_INIT(0, 0, qr_canvas_width * scale, qr_canvas_width * scale); - v_margin = (sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg)) / 5; + v_margin = sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg); + if (v_margin < 0) + return -ENOSPC; + v_margin /= 5; drm_rect_translate(&r_qr_canvas, (sb->width - r_qr_canvas.x2) / 2, 2 * v_margin); r_qr = DRM_RECT_INIT(r_qr_canvas.x1 + QR_MARGIN * scale, r_qr_canvas.y1 + QR_MARGIN * scale, @@ -746,7 +794,7 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) /* Fill with the background color, and draw text on top */ drm_panic_fill(sb, &r_screen, bg_color); - if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr)) + if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr_canvas)) drm_panic_logo_draw(sb, &r_logo, font, fg_color); draw_txt_rectangle(sb, font, panic_msg, panic_msg_lines, true, &r_msg, fg_color); diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..ac27e86c601c 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -27,7 +27,7 @@ //! * <https://github.com/erwanvivien/fast_qr> //! * <https://github.com/bjguillot/qr> -use kernel::{prelude::*, str::CStr}; +use kernel::prelude::*; #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] struct Version(usize); @@ -381,6 +381,26 @@ struct DecFifo { len: usize, } +// On arm32 architecture, dividing an `u64` by a constant will generate a call +// to `__aeabi_uldivmod` which is not present in the kernel. +// So use the multiply by inverse method for this architecture. +fn div10(val: u64) -> u64 { + if cfg!(target_arch = "arm") { + let val_h = val >> 32; + let val_l = val & 0xFFFFFFFF; + let b_h: u64 = 0x66666666; + let b_l: u64 = 0x66666667; + + let tmp1 = val_h * b_l + ((val_l * b_l) >> 32); + let tmp2 = val_l * b_h + (tmp1 & 0xffffffff); + let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32); + + tmp3 >> 2 + } else { + val / 10 + } +} + impl DecFifo { fn push(&mut self, data: u64, len: usize) { let mut chunk = data; @@ -389,7 +409,7 @@ impl DecFifo { } for i in 0..len { self.decimals[i] = (chunk % 10) as u8; - chunk /= 10; + chunk = div10(chunk); } self.len += len; } @@ -404,7 +424,7 @@ impl DecFifo { let mut out = 0; let mut exp = 1; for i in 0..poplen { - out += self.decimals[self.len + i] as u16 * exp; + out += u16::from(self.decimals[self.len + i]) * exp; exp *= 10; } Some((out, NUM_CHARS_BITS[poplen])) @@ -425,7 +445,7 @@ impl Iterator for SegmentIterator<'_> { match self.segment { Segment::Binary(data) => { if self.offset < data.len() { - let byte = data[self.offset] as u16; + let byte = u16::from(data[self.offset]); self.offset += 1; Some((byte, 8)) } else { @@ -948,7 +968,7 @@ pub unsafe extern "C" fn drm_panic_qr_generate( // nul-terminated string. let url_cstr: &CStr = unsafe { CStr::from_char_ptr(url) }; let segments = &[ - &Segment::Binary(url_cstr.as_bytes()), + &Segment::Binary(url_cstr.to_bytes()), &Segment::Numeric(&data_slice[0..data_len]), ]; match EncodedMsg::new(segments, tmp_slice) { diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index b703f83874e1..43a10b4af43a 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -93,7 +93,7 @@ struct drm_prime_member { struct rb_node handle_rb; }; -static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, +int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t handle) { struct drm_prime_member *member; @@ -190,8 +190,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, { struct rb_node *rb; - mutex_lock(&prime_fpriv->lock); - rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; @@ -210,8 +208,6 @@ void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, rb = rb->rb_left; } } - - mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) @@ -453,7 +449,13 @@ struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, } mutex_lock(&dev->object_name_lock); - /* re-export the original imported/exported object */ + /* re-export the original imported object */ + if (obj->import_attach) { + dmabuf = obj->import_attach->dmabuf; + get_dma_buf(dmabuf); + goto out_have_obj; + } + if (obj->dma_buf) { get_dma_buf(obj->dma_buf); dmabuf = obj->dma_buf; diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 6b3541159c0f..09b12c30df69 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -119,6 +119,7 @@ drm_mode_validate_pipeline(struct drm_display_mode *mode, *status = drm_bridge_chain_mode_valid(bridge, &connector->display_info, mode); + drm_bridge_put(bridge); if (*status != MODE_OK) { /* There is also no point in continuing for crtc check * here. */ diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 60c1f26edb6f..b01ffa4d6509 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -18,6 +18,7 @@ #include <linux/gfp.h> #include <linux/i2c.h> #include <linux/kdev_t.h> +#include <linux/pci.h> #include <linux/property.h> #include <linux/slab.h> @@ -30,6 +31,8 @@ #include <drm/drm_property.h> #include <drm/drm_sysfs.h> +#include <asm/video.h> + #include "drm_internal.h" #include "drm_crtc_internal.h" @@ -319,7 +322,7 @@ static const struct bin_attribute edid_attr = { .attr.name = "edid", .attr.mode = 0444, .size = 0, - .read_new = edid_show, + .read = edid_show, }; static const struct bin_attribute *const connector_bin_attrs[] = { @@ -329,7 +332,7 @@ static const struct bin_attribute *const connector_bin_attrs[] = { static const struct attribute_group connector_dev_group = { .attrs = connector_dev_attrs, - .bin_attrs_new = connector_bin_attrs, + .bin_attrs = connector_bin_attrs, }; static const struct attribute_group *connector_dev_groups[] = { @@ -508,6 +511,43 @@ void drm_sysfs_connector_property_event(struct drm_connector *connector, } EXPORT_SYMBOL(drm_sysfs_connector_property_event); +static ssize_t boot_display_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "1\n"); +} +static DEVICE_ATTR_RO(boot_display); + +static struct attribute *display_attrs[] = { + &dev_attr_boot_display.attr, + NULL +}; + +static umode_t boot_display_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj)->parent; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (video_is_primary_device(&pdev->dev)) + return a->mode; + } + + return 0; +} + +static const struct attribute_group display_attr_group = { + .attrs = display_attrs, + .is_visible = boot_display_visible, +}; + +static const struct attribute_group *card_dev_groups[] = { + &display_attr_group, + NULL +}; + struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) { const char *minor_str; @@ -531,6 +571,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) kdev->devt = MKDEV(DRM_MAJOR, minor->index); kdev->class = drm_class; + kdev->groups = card_dev_groups; kdev->type = &drm_sysfs_device_minor; } diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index ace8c98c3e04..95b8a2e4bda6 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -344,17 +344,18 @@ EXPORT_SYMBOL(drm_writeback_connector_init_with_encoder); /** * drm_writeback_connector_cleanup - Cleanup the writeback connector * @dev: DRM device - * @wb_connector: Pointer to the writeback connector to clean up + * @data: Pointer to the writeback connector to clean up * * This will decrement the reference counter of blobs and destroy properties. It * will also clean the remaining jobs in this writeback connector. Caution: This helper will not * clean up the attached encoder and the drm_connector. */ static void drm_writeback_connector_cleanup(struct drm_device *dev, - struct drm_writeback_connector *wb_connector) + void *data) { unsigned long flags; struct drm_writeback_job *pos, *n; + struct drm_writeback_connector *wb_connector = data; delete_writeback_properties(dev); drm_property_blob_put(wb_connector->pixel_formats_blob_ptr); @@ -406,7 +407,7 @@ int drmm_writeback_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drmm_add_action_or_reset(dev, (void *)drm_writeback_connector_cleanup, + ret = drmm_add_action_or_reset(dev, drm_writeback_connector_cleanup, wb_connector); if (ret) return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index 917ad527c961..40a50c60dfff 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -65,7 +65,7 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj) struct iosys_map map = IOSYS_MAP_INIT_VADDR(etnaviv_obj->vaddr); if (etnaviv_obj->vaddr) - dma_buf_vunmap_unlocked(etnaviv_obj->base.dma_buf, &map); + dma_buf_vunmap_unlocked(etnaviv_obj->base.import_attach->dmabuf, &map); /* Don't drop the pages for imported dmabuf, as they are not * ours, just free the array we allocated: @@ -82,7 +82,7 @@ static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj) lockdep_assert_held(&etnaviv_obj->lock); - ret = dma_buf_vmap(etnaviv_obj->base.dma_buf, &map); + ret = dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf, &map); if (ret) return NULL; return map.vaddr; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8..df4232d7e135 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -40,11 +40,11 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job int change; /* - * If the GPU managed to complete this jobs fence, the timout is - * spurious. Bail out. + * If the GPU managed to complete this jobs fence, the timeout has + * fired before free-job worker. The timeout is spurious, so bail out. */ if (dma_fence_is_signaled(submit->out_fence)) - goto out_no_timeout; + return DRM_GPU_SCHED_STAT_NO_HANG; /* * If the GPU is still making forward progress on the front-end (which @@ -70,7 +70,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job gpu->hangcheck_dma_addr = dma_addr; gpu->hangcheck_primid = primid; gpu->hangcheck_fence = gpu->completed_fence; - goto out_no_timeout; + return DRM_GPU_SCHED_STAT_NO_HANG; } /* block scheduler */ @@ -86,11 +86,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job drm_sched_resubmit_jobs(&gpu->sched); drm_sched_start(&gpu->sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; - -out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f91daefa9d2b..b8d9b7251319 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -69,7 +69,6 @@ struct decon_context { void __iomem *regs; unsigned long irq_flags; bool i80_if; - bool suspended; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; @@ -132,9 +131,6 @@ static void decon_shadow_protect_win(struct decon_context *ctx, static void decon_wait_for_vblank(struct decon_context *ctx) { - if (ctx->suspended) - return; - atomic_set(&ctx->wait_vsync_event, 1); /* @@ -210,9 +206,6 @@ static void decon_commit(struct exynos_drm_crtc *crtc) struct drm_display_mode *mode = &crtc->base.state->adjusted_mode; u32 val, clkdiv; - if (ctx->suspended) - return; - /* nothing to do if we haven't set the mode yet */ if (mode->htotal == 0 || mode->vtotal == 0) return; @@ -274,9 +267,6 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return -EPERM; - if (!test_and_set_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -299,9 +289,6 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; u32 val; - if (ctx->suspended) - return; - if (test_and_clear_bit(0, &ctx->irq_flags)) { val = readl(ctx->regs + VIDINTCON0); @@ -404,9 +391,6 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, true); } @@ -427,9 +411,6 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, unsigned int pitch = fb->pitches[0]; unsigned int vidw_addr0_base = ctx->data->vidw_buf_start_base; - if (ctx->suspended) - return; - /* * SHADOWCON/PRTCON register is used for enabling timing. * @@ -517,9 +498,6 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, unsigned int win = plane->index; u32 val; - if (ctx->suspended) - return; - /* protect windows */ decon_shadow_protect_win(ctx, win, true); @@ -538,9 +516,6 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); exynos_crtc_handle_event(crtc); @@ -568,9 +543,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int ret; - if (!ctx->suspended) - return; - ret = pm_runtime_resume_and_get(ctx->dev); if (ret < 0) { DRM_DEV_ERROR(ctx->dev, "failed to enable DECON device.\n"); @@ -584,8 +556,6 @@ static void decon_atomic_enable(struct exynos_drm_crtc *crtc) decon_enable_vblank(ctx->crtc); decon_commit(ctx->crtc); - - ctx->suspended = false; } static void decon_atomic_disable(struct exynos_drm_crtc *crtc) @@ -593,9 +563,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) struct decon_context *ctx = crtc->ctx; int i; - if (ctx->suspended) - return; - /* * We need to make sure that all windows are disabled before we * suspend that connector. Otherwise we might try to scan from @@ -605,8 +572,6 @@ static void decon_atomic_disable(struct exynos_drm_crtc *crtc) decon_disable_plane(crtc, &ctx->planes[i]); pm_runtime_put_sync(ctx->dev); - - ctx->suspended = true; } static const struct exynos_drm_crtc_ops decon_crtc_ops = { @@ -636,6 +601,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); @@ -723,7 +692,6 @@ static int decon_probe(struct platform_device *pdev) return -ENOMEM; ctx->dev = dev; - ctx->suspended = true; ctx->data = of_device_get_match_data(dev); i80_if_timings = of_get_child_by_name(dev->of_node, "i80-if-timings"); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 896a03639e2d..c4d098ab7863 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -154,6 +154,11 @@ static const struct samsung_dsim_plat_data exynos5433_dsi_pdata = { .host_ops = &exynos_dsi_exynos_host_ops, }; +static const struct samsung_dsim_plat_data exynos7870_dsi_pdata = { + .hw_type = DSIM_TYPE_EXYNOS7870, + .host_ops = &exynos_dsi_exynos_host_ops, +}; + static const struct of_device_id exynos_dsi_of_match[] = { { .compatible = "samsung,exynos3250-mipi-dsi", @@ -175,6 +180,10 @@ static const struct of_device_id exynos_dsi_of_match[] = { .compatible = "samsung,exynos5433-mipi-dsi", .data = &exynos5433_dsi_pdata, }, + { + .compatible = "samsung,exynos7870-mipi-dsi", + .data = &exynos7870_dsi_pdata, + }, { /* sentinel. */ } }; MODULE_DEVICE_TABLE(of, exynos_dsi_of_match); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index fc1c5608db96..ddd73e7f26a3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -56,6 +56,7 @@ static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = { struct drm_framebuffer * exynos_drm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct exynos_drm_gem **exynos_gem, int count) @@ -76,7 +77,7 @@ exynos_drm_framebuffer_init(struct drm_device *dev, fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { @@ -94,9 +95,9 @@ err: static struct drm_framebuffer * exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, mode_cmd); struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; struct drm_framebuffer *fb; int i; @@ -124,7 +125,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - fb = exynos_drm_framebuffer_init(dev, mode_cmd, exynos_gem, i); + fb = exynos_drm_framebuffer_init(dev, info, mode_cmd, exynos_gem, i); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto err; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h index 2f841bbdddc5..fdc6cb40cc9c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h @@ -14,6 +14,7 @@ struct drm_framebuffer * exynos_drm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct exynos_drm_gem **exynos_gem, int count); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 9526a25e90ac..93de25b77e68 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -116,7 +116,10 @@ int exynos_drm_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, return PTR_ERR(exynos_gem); helper->fb = - exynos_drm_framebuffer_init(dev, &mode_cmd, &exynos_gem, 1); + exynos_drm_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &exynos_gem, 1); if (IS_ERR(helper->fb)) { DRM_DEV_ERROR(dev->dev, "failed to create drm framebuffer.\n"); ret = PTR_ERR(helper->fb); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index c394cc702d7d..205c238cc73a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4787fee4696f..e3fbb45f37a2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -7,7 +7,6 @@ #include <linux/dma-buf.h> -#include <linux/pfn_t.h> #include <linux/shmem_fs.h> #include <linux/module.h> @@ -174,7 +173,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp); + DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index ea9f66037600..03c8490af4f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task * task->src.rect.h = task->dst.rect.h = UINT_MAX; task->transform.rotation = DRM_MODE_ROTATE_0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task); return task; } @@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, } DRM_DEV_DEBUG_DRIVER(task->dev, - "Got task %pK configuration from userspace\n", + "Got task %p configuration from userspace\n", task); return 0; } @@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, struct exynos_drm_ipp_task *task) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task); exynos_drm_ipp_task_release_buf(&task->src); exynos_drm_ipp_task_release_buf(&task->dst); @@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, DRM_EXYNOS_IPP_FORMAT_DESTINATION); if (!fmt) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: %s format not supported\n", + "Task %p: %s format not supported\n", task, buf == src ? "src" : "dst"); return -EINVAL; } @@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) bool rotate = (rotation != DRM_MODE_ROTATE_0); bool scale = false; - DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task); if (src->rect.w == UINT_MAX) src->rect.w = src->buf.width; @@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) dst->rect.x + dst->rect.w > (dst->buf.width) || dst->rect.y + dst->rect.h > (dst->buf.height)) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: defined area is outside provided buffers\n", + "Task %p: defined area is outside provided buffers\n", task); return -EINVAL; } @@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && src->buf.fourcc != dst->buf.fourcc)) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n", task); return -EINVAL; } @@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) if (ret) return ret; - DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n", task); return ret; @@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; int ret = 0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n", task); ret = exynos_drm_ipp_task_setup_buffer(src, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: src buffer setup failed\n", + "Task %p: src buffer setup failed\n", task); return ret; } ret = exynos_drm_ipp_task_setup_buffer(dst, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: dst buffer setup failed\n", + "Task %p: dst buffer setup failed\n", task); return ret; } - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n", task); return ret; @@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) struct exynos_drm_ipp *ipp = task->ipp; unsigned long flags; - DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n", ipp->id, task, ret); spin_lock_irqsave(&ipp->lock, flags); @@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) spin_unlock_irqrestore(&ipp->lock, flags); DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, selected task %pK to run\n", ipp->id, + "ipp: %d, selected task %p to run\n", ipp->id, task); ret = ipp->funcs->commit(ipp, task); @@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, */ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, nonblocking processing task %pK\n", + "ipp: %d, nonblocking processing task %p\n", ipp->id, task); task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; exynos_drm_ipp_schedule_task(task->ipp, task); ret = 0; } else { - DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n", ipp->id, task); exynos_drm_ipp_schedule_task(ipp, task); ret = wait_event_interruptible(ipp->done_wq, diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c index 8edefea2ef59..32d31e5f5f1a 100644 --- a/drivers/gpu/drm/gma500/fbdev.c +++ b/drivers/gpu/drm/gma500/fbdev.c @@ -6,7 +6,6 @@ **************************************************************************/ #include <linux/fb.h> -#include <linux/pfn_t.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> @@ -33,7 +32,7 @@ static vm_fault_t psb_fbdev_vm_fault(struct vm_fault *vmf) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); for (i = 0; i < page_num; ++i) { - err = vmf_insert_mixed(vma, address, __pfn_to_pfn_t(pfn, PFN_DEV)); + err = vmf_insert_mixed(vma, address, pfn); if (unlikely(err & VM_FAULT_ERROR)) break; address += PAGE_SIZE; @@ -121,7 +120,6 @@ static void psb_fbdev_fb_destroy(struct fb_info *info) drm_fb_helper_fini(fb_helper); drm_framebuffer_unregister_private(fb); - fb->obj[0] = NULL; drm_framebuffer_cleanup(fb); kfree(fb); @@ -203,7 +201,10 @@ int psb_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, return PTR_ERR(backing); obj = &backing->base; - fb = psb_framebuffer_create(dev, &mode_cmd, obj); + fb = psb_framebuffer_create(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, obj); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto err_drm_gem_object_put; @@ -243,7 +244,6 @@ int psb_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, err_drm_framebuffer_unregister_private: drm_framebuffer_unregister_private(fb); - fb->obj[0] = NULL; drm_framebuffer_cleanup(fb); kfree(fb); err_drm_gem_object_put: diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 1a374702b696..e69b537ded6b 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -29,24 +29,23 @@ static const struct drm_framebuffer_funcs psb_fb_funcs = { */ static int psb_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { - const struct drm_format_info *info; int ret; /* * Reject unknown formats, YUV formats, and formats with more than * 4 bytes per pixel. */ - info = drm_get_format_info(dev, mode_cmd); - if (!info || !info->depth || info->cpp[0] > 4) + if (!info->depth || info->cpp[0] > 4) return -EINVAL; if (mode_cmd->pitches[0] & 63) return -EINVAL; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); fb->obj[0] = obj; ret = drm_framebuffer_init(dev, fb, &psb_fb_funcs); if (ret) { @@ -59,6 +58,7 @@ static int psb_framebuffer_init(struct drm_device *dev, /** * psb_framebuffer_create - create a framebuffer backed by gt * @dev: our DRM device + * @info: pixel format information * @mode_cmd: the description of the requested mode * @obj: the backing object * @@ -68,6 +68,7 @@ static int psb_framebuffer_init(struct drm_device *dev, * TODO: review object references */ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { @@ -78,7 +79,7 @@ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, if (!fb) return ERR_PTR(-ENOMEM); - ret = psb_framebuffer_init(dev, fb, mode_cmd, obj); + ret = psb_framebuffer_init(dev, fb, info, mode_cmd, obj); if (ret) { kfree(fb); return ERR_PTR(ret); @@ -96,6 +97,7 @@ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, */ static struct drm_framebuffer *psb_user_framebuffer_create (struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { struct drm_gem_object *obj; @@ -110,7 +112,7 @@ static struct drm_framebuffer *psb_user_framebuffer_create return ERR_PTR(-ENOENT); /* Let the core code do all the work */ - fb = psb_framebuffer_create(dev, cmd, obj); + fb = psb_framebuffer_create(dev, info, cmd, obj); if (IS_ERR(fb)) drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 1cf394369127..c0feca58511d 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -726,8 +726,8 @@ void oaktrail_hdmi_teardown(struct drm_device *dev) if (hdmi_dev) { pdev = hdmi_dev->dev; - pci_set_drvdata(pdev, NULL); oaktrail_hdmi_i2c_exit(pdev); + pci_set_drvdata(pdev, NULL); iounmap(hdmi_dev->regs); kfree(hdmi_dev); pci_dev_put(pdev); diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index 7f77cb2b2751..0b27112ec46f 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -594,6 +594,7 @@ extern void psb_modeset_cleanup(struct drm_device *dev); /* framebuffer */ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/gud/gud_connector.c b/drivers/gpu/drm/gud/gud_connector.c index 0f07d77c5d52..4a15695fa933 100644 --- a/drivers/gpu/drm/gud/gud_connector.c +++ b/drivers/gpu/drm/gud/gud_connector.c @@ -16,7 +16,6 @@ #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -607,13 +606,16 @@ int gud_connector_fill_properties(struct drm_connector_state *connector_state, return gconn->num_properties; } +static const struct drm_encoder_funcs gud_drm_simple_encoder_funcs_cleanup = { + .destroy = drm_encoder_cleanup, +}; + static int gud_connector_create(struct gud_device *gdrm, unsigned int index, struct gud_connector_descriptor_req *desc) { struct drm_device *drm = &gdrm->drm; struct gud_connector *gconn; struct drm_connector *connector; - struct drm_encoder *encoder; int ret, connector_type; u32 flags; @@ -681,20 +683,13 @@ static int gud_connector_create(struct gud_device *gdrm, unsigned int index, return ret; } - /* The first connector is attached to the existing simple pipe encoder */ - if (!connector->index) { - encoder = &gdrm->pipe.encoder; - } else { - encoder = &gconn->encoder; - - ret = drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_NONE); - if (ret) - return ret; - - encoder->possible_crtcs = 1; - } + gconn->encoder.possible_crtcs = drm_crtc_mask(&gdrm->crtc); + ret = drm_encoder_init(drm, &gconn->encoder, &gud_drm_simple_encoder_funcs_cleanup, + DRM_MODE_ENCODER_NONE, NULL); + if (ret) + return ret; - return drm_connector_attach_encoder(connector, encoder); + return drm_connector_attach_encoder(connector, &gconn->encoder); } int gud_get_connectors(struct gud_device *gdrm) diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index 5385a2126e45..b7345c8d823d 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -16,6 +16,7 @@ #include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> @@ -27,7 +28,6 @@ #include <drm/drm_managed.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -289,7 +289,7 @@ static int gud_get_properties(struct gud_device *gdrm) * but mask out any additions on future devices. */ val &= GUD_ROTATION_MASK; - ret = drm_plane_create_rotation_property(&gdrm->pipe.plane, + ret = drm_plane_create_rotation_property(&gdrm->plane, DRM_MODE_ROTATE_0, val); break; default: @@ -338,10 +338,30 @@ static int gud_stats_debugfs(struct seq_file *m, void *data) return 0; } -static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = { - .check = gud_pipe_check, - .update = gud_pipe_update, - DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS +static const struct drm_crtc_helper_funcs gud_crtc_helper_funcs = { + .atomic_check = drm_crtc_helper_atomic_check +}; + +static const struct drm_crtc_funcs gud_crtc_funcs = { + .reset = drm_atomic_helper_crtc_reset, + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, +}; + +static const struct drm_plane_helper_funcs gud_plane_helper_funcs = { + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = gud_plane_atomic_check, + .atomic_update = gud_plane_atomic_update, +}; + +static const struct drm_plane_funcs gud_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + DRM_GEM_SHADOW_PLANE_FUNCS, }; static const struct drm_mode_config_funcs gud_mode_config_funcs = { @@ -350,7 +370,7 @@ static const struct drm_mode_config_funcs gud_mode_config_funcs = { .atomic_commit = drm_atomic_helper_commit, }; -static const u64 gud_pipe_modifiers[] = { +static const u64 gud_plane_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID }; @@ -567,12 +587,17 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) return -ENOMEM; } - ret = drm_simple_display_pipe_init(drm, &gdrm->pipe, &gud_pipe_funcs, - formats, num_formats, - gud_pipe_modifiers, NULL); + ret = drm_universal_plane_init(drm, &gdrm->plane, 0, + &gud_plane_funcs, + formats, num_formats, + gud_plane_modifiers, + DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ret; + drm_plane_helper_add(&gdrm->plane, &gud_plane_helper_funcs); + drm_plane_enable_fb_damage_clips(&gdrm->plane); + devm_kfree(dev, formats); devm_kfree(dev, formats_dev); @@ -582,7 +607,12 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) return ret; } - drm_plane_enable_fb_damage_clips(&gdrm->pipe.plane); + ret = drm_crtc_init_with_planes(drm, &gdrm->crtc, &gdrm->plane, NULL, + &gud_crtc_funcs, NULL); + if (ret) + return ret; + + drm_crtc_helper_add(&gdrm->crtc, &gud_crtc_helper_funcs); ret = gud_get_connectors(gdrm); if (ret) { @@ -620,8 +650,6 @@ static void gud_disconnect(struct usb_interface *interface) struct gud_device *gdrm = usb_get_intfdata(interface); struct drm_device *drm = &gdrm->drm; - drm_dbg(drm, "%s:\n", __func__); - drm_kms_helper_poll_fini(drm); drm_dev_unplug(drm); drm_atomic_helper_shutdown(drm); diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index d6fb25388722..d27c31648341 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h @@ -11,11 +11,11 @@ #include <uapi/drm/drm_fourcc.h> #include <drm/drm_modes.h> -#include <drm/drm_simple_kms_helper.h> struct gud_device { struct drm_device drm; - struct drm_simple_display_pipe pipe; + struct drm_plane plane; + struct drm_crtc crtc; struct work_struct work; u32 flags; const struct drm_format_info *xrgb8888_emulation_format; @@ -62,11 +62,10 @@ int gud_usb_set_u8(struct gud_device *gdrm, u8 request, u8 val); void gud_clear_damage(struct gud_device *gdrm); void gud_flush_work(struct work_struct *work); -int gud_pipe_check(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *new_crtc_state); -void gud_pipe_update(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state); +int gud_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state); +void gud_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *atomic_state); int gud_connector_fill_properties(struct drm_connector_state *connector_state, struct gud_property_req *properties); int gud_get_connectors(struct gud_device *gdrm); diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index adadd526641d..54d9aa9998e5 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -20,7 +20,6 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> -#include <drm/drm_simple_kms_helper.h> #include <drm/gud.h> #include "gud_internal.h" @@ -188,8 +187,13 @@ retry: } else if (format->format == DRM_FORMAT_RGB332) { drm_fb_xrgb8888_to_rgb332(&dst, NULL, src, fb, rect, fmtcnv_state); } else if (format->format == DRM_FORMAT_RGB565) { - drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, fmtcnv_state, - gud_is_big_endian()); + if (gud_is_big_endian()) { + drm_fb_xrgb8888_to_rgb565be(&dst, NULL, src, fb, rect, + fmtcnv_state); + } else { + drm_fb_xrgb8888_to_rgb565(&dst, NULL, src, fb, rect, + fmtcnv_state); + } } else if (format->format == DRM_FORMAT_RGB888) { drm_fb_xrgb8888_to_rgb888(&dst, NULL, src, fb, rect, fmtcnv_state); } else { @@ -446,14 +450,15 @@ static void gud_fb_handle_damage(struct gud_device *gdrm, struct drm_framebuffer gud_flush_damage(gdrm, fb, src, !fb->obj[0]->import_attach, damage); } -int gud_pipe_check(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *new_crtc_state) +int gud_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) { - struct gud_device *gdrm = to_gud_device(pipe->crtc.dev); - struct drm_plane_state *old_plane_state = pipe->plane.state; - const struct drm_display_mode *mode = &new_crtc_state->mode; - struct drm_atomic_state *state = new_plane_state->state; + struct gud_device *gdrm = to_gud_device(plane->dev); + struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *crtc = new_plane_state->crtc; + struct drm_crtc_state *crtc_state; + const struct drm_display_mode *mode; struct drm_framebuffer *old_fb = old_plane_state->fb; struct drm_connector_state *connector_state = NULL; struct drm_framebuffer *fb = new_plane_state->fb; @@ -464,20 +469,37 @@ int gud_pipe_check(struct drm_simple_display_pipe *pipe, int idx, ret; size_t len; - if (WARN_ON_ONCE(!fb)) + if (drm_WARN_ON_ONCE(plane->dev, !fb)) + return -EINVAL; + + if (drm_WARN_ON_ONCE(plane->dev, !crtc)) return -EINVAL; + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + + mode = &crtc_state->mode; + + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + false, false); + if (ret) + return ret; + + if (!new_plane_state->visible) + return 0; + if (old_plane_state->rotation != new_plane_state->rotation) - new_crtc_state->mode_changed = true; + crtc_state->mode_changed = true; if (old_fb && old_fb->format != format) - new_crtc_state->mode_changed = true; + crtc_state->mode_changed = true; - if (!new_crtc_state->mode_changed && !new_crtc_state->connectors_changed) + if (!crtc_state->mode_changed && !crtc_state->connectors_changed) return 0; /* Only one connector is supported */ - if (hweight32(new_crtc_state->connector_mask) != 1) + if (hweight32(crtc_state->connector_mask) != 1) return -EINVAL; if (format->format == DRM_FORMAT_XRGB8888 && gdrm->xrgb8888_emulation_format) @@ -495,7 +517,7 @@ int gud_pipe_check(struct drm_simple_display_pipe *pipe, if (!connector_state) { struct drm_connector_list_iter conn_iter; - drm_connector_list_iter_begin(pipe->crtc.dev, &conn_iter); + drm_connector_list_iter_begin(plane->dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->state->crtc) { connector_state = connector->state; @@ -562,16 +584,18 @@ out: return ret; } -void gud_pipe_update(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state) +void gud_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *atomic_state) { - struct drm_device *drm = pipe->crtc.dev; + struct drm_device *drm = plane->dev; struct gud_device *gdrm = to_gud_device(drm); - struct drm_plane_state *state = pipe->plane.state; - struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); - struct drm_framebuffer *fb = state->fb; - struct drm_crtc *crtc = &pipe->crtc; + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(atomic_state, plane); + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(atomic_state, plane); + struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(new_state); + struct drm_framebuffer *fb = new_state->fb; + struct drm_crtc *crtc = new_state->crtc; struct drm_rect damage; + struct drm_atomic_helper_damage_iter iter; int ret, idx; if (crtc->state->mode_changed || !crtc->state->enable) { @@ -606,7 +630,8 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, if (ret) goto ctrl_disable; - if (drm_atomic_helper_damage_merged(old_state, state, &damage)) + drm_atomic_helper_damage_iter_init(&iter, old_state, new_state); + drm_atomic_for_each_plane_damage(&iter, &damage) gud_fb_handle_damage(gdrm, fb, &shadow_plane_state->data[0], &damage); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); diff --git a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c index 74f7832ea53e..0726cb5b736e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c +++ b/drivers/gpu/drm/hisilicon/hibmc/dp/dp_link.c @@ -325,6 +325,17 @@ static int hibmc_dp_link_downgrade_training_eq(struct hibmc_dp_dev *dp) return hibmc_dp_link_reduce_rate(dp); } +static void hibmc_dp_update_caps(struct hibmc_dp_dev *dp) +{ + dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; + if (dp->link.cap.link_rate > DP_LINK_BW_8_1 || !dp->link.cap.link_rate) + dp->link.cap.link_rate = DP_LINK_BW_8_1; + + dp->link.cap.lanes = dp->dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK; + if (dp->link.cap.lanes > HIBMC_DP_LANE_NUM_MAX) + dp->link.cap.lanes = HIBMC_DP_LANE_NUM_MAX; +} + int hibmc_dp_link_training(struct hibmc_dp_dev *dp) { struct hibmc_dp_link *link = &dp->link; @@ -334,8 +345,7 @@ int hibmc_dp_link_training(struct hibmc_dp_dev *dp) if (ret) drm_err(dp->dev, "dp aux read dpcd failed, ret: %d\n", ret); - dp->link.cap.link_rate = dp->dpcd[DP_MAX_LINK_RATE]; - dp->link.cap.lanes = 0x2; + hibmc_dp_update_caps(dp); ret = hibmc_dp_get_serdes_rate_cfg(dp); if (ret < 0) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 768b97f9e74a..289304500ab0 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -32,7 +32,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "vblank", "hpd" }; +static const char *g_irqs_names_map[HIBMC_MAX_VECTORS] = { "hibmc-vblank", "hibmc-hpd" }; static irqreturn_t hibmc_interrupt(int irq, void *arg) { @@ -115,6 +115,8 @@ static const struct drm_mode_config_funcs hibmc_mode_funcs = { static int hibmc_kms_init(struct hibmc_drm_private *priv) { struct drm_device *dev = &priv->dev; + struct drm_encoder *encoder; + u32 clone_mask = 0; int ret; ret = drmm_mode_config_init(dev); @@ -154,6 +156,12 @@ static int hibmc_kms_init(struct hibmc_drm_private *priv) return ret; } + drm_for_each_encoder(encoder, dev) + clone_mask |= drm_encoder_mask(encoder); + + drm_for_each_encoder(encoder, dev) + encoder->possible_clones = clone_mask; + return 0; } @@ -277,7 +285,6 @@ static void hibmc_unload(struct drm_device *dev) static int hibmc_msi_init(struct drm_device *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - char name[32] = {0}; int valid_irq_num; int irq; int ret; @@ -292,9 +299,6 @@ static int hibmc_msi_init(struct drm_device *dev) valid_irq_num = ret; for (int i = 0; i < valid_irq_num; i++) { - snprintf(name, ARRAY_SIZE(name) - 1, "%s-%s-%s", - dev->driver->name, pci_name(pdev), g_irqs_names_map[i]); - irq = pci_irq_vector(pdev, i); if (i) @@ -302,10 +306,10 @@ static int hibmc_msi_init(struct drm_device *dev) ret = devm_request_threaded_irq(&pdev->dev, irq, hibmc_dp_interrupt, hibmc_dp_hpd_isr, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); else ret = devm_request_irq(&pdev->dev, irq, hibmc_interrupt, - IRQF_SHARED, name, dev); + IRQF_SHARED, g_irqs_names_map[i], dev); if (ret) { drm_err(dev, "install irq failed: %d\n", ret); return ret; @@ -323,13 +327,13 @@ static int hibmc_load(struct drm_device *dev) ret = hibmc_hw_init(priv); if (ret) - goto err; + return ret; ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); if (ret) { drm_err(dev, "Error initializing VRAM MM; %d\n", ret); - goto err; + return ret; } ret = hibmc_kms_init(priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 274feabe7df0..ca8502e2760c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -69,6 +69,7 @@ int hibmc_de_init(struct hibmc_drm_private *priv); int hibmc_vdac_init(struct hibmc_drm_private *priv); int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *connector); +void hibmc_ddc_del(struct hibmc_vdac *vdac); int hibmc_dp_init(struct hibmc_drm_private *priv); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c index 99b3b77b5445..44860011855e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -95,3 +95,8 @@ int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_vdac *vdac) return i2c_bit_add_bus(&vdac->adapter); } + +void hibmc_ddc_del(struct hibmc_vdac *vdac) +{ + i2c_del_adapter(&vdac->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index e8a527ede854..841e81f47b68 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -53,7 +53,7 @@ static void hibmc_connector_destroy(struct drm_connector *connector) { struct hibmc_vdac *vdac = to_hibmc_vdac(connector); - i2c_del_adapter(&vdac->adapter); + hibmc_ddc_del(vdac); drm_connector_cleanup(connector); } @@ -110,7 +110,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) ret = drmm_encoder_init(dev, encoder, NULL, DRM_MODE_ENCODER_DAC, NULL); if (ret) { drm_err(dev, "failed to init encoder: %d\n", ret); - return ret; + goto err; } drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs); @@ -121,7 +121,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) &vdac->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); - return ret; + goto err; } drm_connector_helper_add(connector, &hibmc_connector_helper_funcs); @@ -131,4 +131,9 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; return 0; + +err: + hibmc_ddc_del(vdac); + + return ret; } diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 1852e0804942..3562a02ef7ad 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -50,7 +50,7 @@ config DRM_I915_DEBUG select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y select DRM_EXPORT_FOR_TESTS if m - select DRM_DEBUG_SELFTEST + select DRM_KUNIT_TEST if KUNIT select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_WERROR diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 7c6075bc483c..e58c0c158b3a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -32,6 +32,7 @@ i915-y += \ i915_scatterlist.o \ i915_switcheroo.o \ i915_sysfs.o \ + i915_timer_util.o \ i915_utils.o \ intel_clock_gating.o \ intel_cpu_info.o \ @@ -218,12 +219,11 @@ i915-$(CONFIG_HWMON) += \ # modesetting core code i915-y += \ display/hsw_ips.o \ - display/i9xx_plane.o \ display/i9xx_display_sr.o \ + display/i9xx_plane.o \ display/i9xx_wm.o \ display/intel_alpm.o \ display/intel_atomic.o \ - display/intel_atomic_plane.o \ display/intel_audio.o \ display/intel_bios.o \ display/intel_bo.o \ @@ -265,6 +265,7 @@ i915-y += \ display/intel_fbc.o \ display/intel_fdi.o \ display/intel_fifo_underrun.o \ + display/intel_flipq.o \ display/intel_frontbuffer.o \ display/intel_global_state.o \ display/intel_hdcp.o \ @@ -280,9 +281,11 @@ i915-y += \ display/intel_modeset_setup.o \ display/intel_modeset_verify.o \ display/intel_overlay.o \ + display/intel_panic.o \ display/intel_pch.o \ display/intel_pch_display.o \ display/intel_pch_refclk.o \ + display/intel_plane.o \ display/intel_plane_initial.o \ display/intel_pmdemand.o \ display/intel_psr.o \ diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index 87f6b9602b16..aa159f9ce12f 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -424,17 +424,6 @@ intel_dp_link_down(struct intel_encoder *encoder, drm_dbg_kms(display->drm, "\n"); - if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(display) && port != PORT_A)) { - intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT; - intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; - } else { - intel_dp->DP &= ~DP_LINK_TRAIN_MASK; - intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE; - } - intel_de_write(display, intel_dp->output_reg, intel_dp->DP); - intel_de_posting_read(display, intel_dp->output_reg); - intel_dp->DP &= ~DP_PORT_EN; intel_de_write(display, intel_dp->output_reg, intel_dp->DP); intel_de_posting_read(display, intel_dp->output_reg); @@ -612,6 +601,19 @@ cpt_set_link_train(struct intel_dp *intel_dp, } static void +cpt_set_idle_link_train(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + intel_dp->DP &= ~DP_LINK_TRAIN_MASK_CPT; + intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; + + intel_de_write(display, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(display, intel_dp->output_reg); +} + +static void g4x_set_link_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 dp_train_pat) @@ -639,6 +641,19 @@ g4x_set_link_train(struct intel_dp *intel_dp, intel_de_posting_read(display, intel_dp->output_reg); } +static void +g4x_set_idle_link_train(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(intel_dp); + + intel_dp->DP &= ~DP_LINK_TRAIN_MASK; + intel_dp->DP |= DP_LINK_TRAIN_PAT_IDLE; + + intel_de_write(display, intel_dp->output_reg, intel_dp->DP); + intel_de_posting_read(display, intel_dp->output_reg); +} + static void intel_dp_enable_port(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -1285,12 +1300,10 @@ bool g4x_dp_init(struct intel_display *display, drm_dbg_kms(display->drm, "No VBT child device for DP-%c\n", port_name(port)); - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return false; - dig_port->aux_ch = AUX_CH_NONE; - intel_connector = intel_connector_alloc(); if (!intel_connector) goto err_connector_alloc; @@ -1300,8 +1313,6 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->devdata = devdata; - mutex_init(&dig_port->hdcp.mutex); - if (drm_encoder_init(display->drm, &intel_encoder->base, &intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS, "DP %c", port_name(port))) @@ -1342,10 +1353,13 @@ bool g4x_dp_init(struct intel_display *display, intel_encoder->audio_disable = g4x_dp_audio_disable; if ((display->platform.ivybridge && port == PORT_A) || - (HAS_PCH_CPT(display) && port != PORT_A)) + (HAS_PCH_CPT(display) && port != PORT_A)) { dig_port->dp.set_link_train = cpt_set_link_train; - else + dig_port->dp.set_idle_link_train = cpt_set_idle_link_train; + } else { dig_port->dp.set_link_train = g4x_set_link_train; + dig_port->dp.set_idle_link_train = g4x_set_idle_link_train; + } if (display->platform.cherryview) intel_encoder->set_signal_levels = chv_set_signal_levels; @@ -1368,7 +1382,6 @@ bool g4x_dp_init(struct intel_display *display, } dig_port->dp.output_reg = output_reg; - dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; intel_encoder->power_domain = intel_display_power_ddi_lanes_domain(display, port); diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 2610f5702fb9..f6e2d1ed5639 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -19,7 +19,7 @@ #include "intel_display_types.h" #include "intel_dp_aux.h" #include "intel_dpio_phy.h" -#include "intel_fdi.h" +#include "intel_encoder.h" #include "intel_fifo_underrun.h" #include "intel_hdmi.h" #include "intel_hotplug.h" @@ -135,11 +135,8 @@ static int g4x_hdmi_compute_config(struct intel_encoder *encoder, struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - if (HAS_PCH_SPLIT(display)) { + if (HAS_PCH_SPLIT(display)) crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) - return -EINVAL; - } if (display->platform.g4x) crtc_state->has_hdmi_sink = g4x_compute_has_hdmi_sink(state, crtc); @@ -690,12 +687,10 @@ bool g4x_hdmi_init(struct intel_display *display, drm_dbg_kms(display->drm, "No VBT child device for HDMI-%c\n", port_name(port)); - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return false; - dig_port->aux_ch = AUX_CH_NONE; - intel_connector = intel_connector_alloc(); if (!intel_connector) goto err_connector_alloc; @@ -704,8 +699,6 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->devdata = devdata; - mutex_init(&dig_port->hdcp.mutex); - if (drm_encoder_init(display->drm, &intel_encoder->base, &intel_hdmi_enc_funcs, DRM_MODE_ENCODER_TMDS, "HDMI %c", port_name(port))) @@ -767,8 +760,6 @@ bool g4x_hdmi_init(struct intel_display *display, intel_encoder->cloneable |= BIT(INTEL_OUTPUT_HDMI); dig_port->hdmi.hdmi_reg = hdmi_reg; - dig_port->dp.output_reg = INVALID_MMIO_REG; - dig_port->max_lanes = 4; intel_infoframe_init(dig_port); diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c index 0d33782f11be..927fe56aec77 100644 --- a/drivers/gpu/drm/i915/display/hsw_ips.c +++ b/drivers/gpu/drm/i915/display/hsw_ips.c @@ -5,8 +5,9 @@ #include <linux/debugfs.h> +#include <drm/drm_print.h> + #include "hsw_ips.h" -#include "i915_drv.h" #include "i915_reg.h" #include "intel_color_regs.h" #include "intel_de.h" @@ -18,8 +19,6 @@ static void hsw_ips_enable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); u32 val; if (!crtc_state->ips_enabled) @@ -40,8 +39,8 @@ static void hsw_ips_enable(const struct intel_crtc_state *crtc_state) if (display->platform.broadwell) { drm_WARN_ON(display->drm, - snb_pcode_write(&i915->uncore, DISPLAY_IPS_CONTROL, - val | IPS_PCODE_CONTROL)); + intel_pcode_write(display->drm, DISPLAY_IPS_CONTROL, + val | IPS_PCODE_CONTROL)); /* * Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the @@ -66,8 +65,6 @@ static void hsw_ips_enable(const struct intel_crtc_state *crtc_state) bool hsw_ips_disable(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); bool need_vblank_wait = false; if (!crtc_state->ips_enabled) @@ -75,7 +72,7 @@ bool hsw_ips_disable(const struct intel_crtc_state *crtc_state) if (display->platform.broadwell) { drm_WARN_ON(display->drm, - snb_pcode_write(&i915->uncore, DISPLAY_IPS_CONTROL, 0)); + intel_pcode_write(display->drm, DISPLAY_IPS_CONTROL, 0)); /* * Wait for PCODE to finish disabling IPS. The BSpec specified * 42ms timeout value leads to occasional timeouts so use 100ms @@ -268,7 +265,7 @@ int hsw_ips_compute_config(struct intel_atomic_state *state, return PTR_ERR(cdclk_state); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (crtc_state->pixel_rate > cdclk_state->logical.cdclk * 95 / 100) + if (crtc_state->pixel_rate > intel_cdclk_logical(cdclk_state) * 95 / 100) return 0; } diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 8f15333a4b07..407deb5dfb57 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -15,7 +15,6 @@ #include "i9xx_plane.h" #include "i9xx_plane_regs.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_regs.h" @@ -23,6 +22,8 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" +#include "intel_plane.h" #include "intel_sprite.h" /* Primary plane formats for gen <= 3 */ @@ -154,8 +155,7 @@ static bool i9xx_plane_has_windowing(struct intel_plane *plane) i9xx_plane == PLANE_C; } -static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_plane_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -336,10 +336,10 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = intel_atomic_plane_check_clipping(plane_state, crtc_state, - DRM_PLANE_NO_SCALING, - DRM_PLANE_NO_SCALING, - i9xx_plane_has_windowing(plane)); + ret = intel_plane_check_clipping(plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + i9xx_plane_has_windowing(plane)); if (ret) return ret; @@ -354,11 +354,24 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - plane_state->ctl = i9xx_plane_ctl(crtc_state, plane_state); + plane_state->ctl = i9xx_plane_ctl(plane_state); return 0; } +static u32 i8xx_plane_surf_offset(const struct intel_plane_state *plane_state) +{ + int x = plane_state->view.color_plane[0].x; + int y = plane_state->view.color_plane[0].y; + + return intel_fb_xy_to_linear(x, y, plane_state, 0); +} + +u32 i965_plane_surf_offset(const struct intel_plane_state *plane_state) +{ + return plane_state->view.color_plane[0].offset; +} + static u32 i9xx_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(crtc_state); @@ -462,7 +475,7 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; int x = plane_state->view.color_plane[0].x; int y = plane_state->view.color_plane[0].y; - u32 dspcntr, dspaddr_offset, linear_offset; + u32 dspcntr; dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); @@ -471,13 +484,6 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, crtc_state->async_flip_planes & BIT(plane->id)) dspcntr |= DISP_ASYNC_FLIP; - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - - if (DISPLAY_VER(display) >= 4) - dspaddr_offset = plane_state->view.color_plane[0].offset; - else - dspaddr_offset = linear_offset; - if (display->platform.cherryview && i9xx_plane == PLANE_B) { int crtc_x = plane_state->uapi.dst.x1; int crtc_y = plane_state->uapi.dst.y1; @@ -497,7 +503,7 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, DISP_OFFSET_Y(y) | DISP_OFFSET_X(x)); } else if (DISPLAY_VER(display) >= 4) { intel_de_write_fw(display, DSPLINOFF(display, i9xx_plane), - linear_offset); + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, DSPTILEOFF(display, i9xx_plane), DISP_OFFSET_Y(y) | DISP_OFFSET_X(x)); } @@ -510,11 +516,9 @@ static void i9xx_plane_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, DSPCNTR(display, i9xx_plane), dspcntr); if (DISPLAY_VER(display) >= 4) - intel_de_write_fw(display, DSPSURF(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPSURF(display, i9xx_plane), plane_state->surf); else - intel_de_write_fw(display, DSPADDR(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPADDR(display, i9xx_plane), plane_state->surf); } static void i830_plane_update_arm(struct intel_dsb *dsb, @@ -603,16 +607,13 @@ g4x_primary_async_flip(struct intel_dsb *dsb, { struct intel_display *display = to_intel_display(plane); u32 dspcntr = plane_state->ctl | i9xx_plane_ctl_crtc(crtc_state); - u32 dspaddr_offset = plane_state->view.color_plane[0].offset; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; if (async_flip) dspcntr |= DISP_ASYNC_FLIP; intel_de_write_fw(display, DSPCNTR(display, i9xx_plane), dspcntr); - - intel_de_write_fw(display, DSPSURF(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPSURF(display, i9xx_plane), plane_state->surf); } static void @@ -623,11 +624,9 @@ vlv_primary_async_flip(struct intel_dsb *dsb, bool async_flip) { struct intel_display *display = to_intel_display(plane); - u32 dspaddr_offset = plane_state->view.color_plane[0].offset; enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; - intel_de_write_fw(display, DSPADDR_VLV(display, i9xx_plane), - intel_plane_ggtt_offset(plane_state) + dspaddr_offset); + intel_de_write_fw(display, DSPADDR_VLV(display, i9xx_plane), plane_state->surf); } static void @@ -905,6 +904,27 @@ static const struct drm_plane_funcs i8xx_plane_funcs = { .format_mod_supported_async = intel_plane_format_mod_supported_async, }; +static void i9xx_disable_tiling(struct intel_plane *plane) +{ + struct intel_display *display = to_intel_display(plane); + enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; + u32 dspcntr; + u32 reg; + + dspcntr = intel_de_read_fw(display, DSPCNTR(display, i9xx_plane)); + dspcntr &= ~DISP_TILED; + intel_de_write_fw(display, DSPCNTR(display, i9xx_plane), dspcntr); + + if (DISPLAY_VER(display) >= 4) { + reg = intel_de_read_fw(display, DSPSURF(display, i9xx_plane)); + intel_de_write_fw(display, DSPSURF(display, i9xx_plane), reg); + + } else { + reg = intel_de_read_fw(display, DSPADDR(display, i9xx_plane)); + intel_de_write_fw(display, DSPADDR(display, i9xx_plane), reg); + } +} + struct intel_plane * intel_primary_plane_create(struct intel_display *display, enum pipe pipe) { @@ -1015,6 +1035,11 @@ intel_primary_plane_create(struct intel_display *display, enum pipe pipe) plane->get_hw_state = i9xx_plane_get_hw_state; plane->check_plane = i9xx_plane_check; + if (DISPLAY_VER(display) >= 4) + plane->surf_offset = i965_plane_surf_offset; + else + plane->surf_offset = i8xx_plane_surf_offset; + if (DISPLAY_VER(display) >= 5 || display->platform.g4x) plane->capture_error = g4x_primary_capture_error; else if (DISPLAY_VER(display) >= 4) @@ -1047,6 +1072,8 @@ intel_primary_plane_create(struct intel_display *display, enum pipe pipe) } } + plane->disable_tiling = i9xx_disable_tiling; + modifiers = intel_fb_plane_get_modifiers(display, INTEL_PLANE_CAP_TILING_X); if (DISPLAY_VER(display) >= 5 || display->platform.g4x) @@ -1151,7 +1178,7 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, drm_WARN_ON(display->drm, pipe != crtc->pipe); - intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; @@ -1230,24 +1257,21 @@ bool i9xx_fixup_initial_plane_config(struct intel_crtc *crtc, const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); enum i9xx_plane_id i9xx_plane = plane->i9xx_plane; - u32 base; if (!plane_state->uapi.visible) return false; - base = intel_plane_ggtt_offset(plane_state); - /* * We may have moved the surface to a different * part of ggtt, make the plane aware of that. */ - if (plane_config->base == base) + if (plane_config->base == plane_state->surf) return false; if (DISPLAY_VER(display) >= 4) - intel_de_write(display, DSPSURF(display, i9xx_plane), base); + intel_de_write(display, DSPSURF(display, i9xx_plane), plane_state->surf); else - intel_de_write(display, DSPADDR(display, i9xx_plane), base); + intel_de_write(display, DSPADDR(display, i9xx_plane), plane_state->surf); return true; } diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.h b/drivers/gpu/drm/i915/display/i9xx_plane.h index d90546d60855..565dab751301 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.h +++ b/drivers/gpu/drm/i915/display/i9xx_plane.h @@ -24,6 +24,7 @@ unsigned int vlv_plane_min_alignment(struct intel_plane *plane, const struct drm_framebuffer *fb, int colot_plane); int i9xx_check_plane_surface(struct intel_plane_state *plane_state); +u32 i965_plane_surf_offset(const struct intel_plane_state *plane_state); struct intel_plane * intel_primary_plane_create(struct intel_display *display, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 1f9db5118777..fd3b7b35f351 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -3,6 +3,10 @@ * Copyright © 2023 Intel Corporation */ +#include <linux/iopoll.h> + +#include "soc/intel_dram.h" + #include "i915_drv.h" #include "i915_reg.h" #include "i9xx_wm.h" @@ -85,7 +89,8 @@ static const struct cxsr_latency cxsr_latency_table[] = { static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); + const struct dram_info *dram_info = intel_dram_info(display->drm); + bool is_ddr3 = dram_info->type == INTEL_DRAM_DDR3; int i; for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { @@ -93,15 +98,16 @@ static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *dis bool is_desktop = !display->platform.mobile; if (is_desktop == latency->is_desktop && - i915->is_ddr3 == latency->is_ddr3 && - DIV_ROUND_CLOSEST(i915->fsb_freq, 1000) == latency->fsb_freq && - DIV_ROUND_CLOSEST(i915->mem_freq, 1000) == latency->mem_freq) + is_ddr3 == latency->is_ddr3 && + DIV_ROUND_CLOSEST(dram_info->fsb_freq, 1000) == latency->fsb_freq && + DIV_ROUND_CLOSEST(dram_info->mem_freq, 1000) == latency->mem_freq) return latency; } drm_dbg_kms(display->drm, - "Could not find CxSR latency for DDR%s, FSB %u kHz, MEM %u kHz\n", - i915->is_ddr3 ? "3" : "2", i915->fsb_freq, i915->mem_freq); + "Could not find CxSR latency for %s, FSB %u kHz, MEM %u kHz\n", + intel_dram_type_str(dram_info->type), + dram_info->fsb_freq, dram_info->mem_freq); return NULL; } @@ -109,6 +115,7 @@ static const struct cxsr_latency *pnv_get_cxsr_latency(struct intel_display *dis static void chv_set_memory_dvfs(struct intel_display *display, bool enable) { u32 val; + int ret; vlv_punit_get(display->drm); @@ -121,8 +128,10 @@ static void chv_set_memory_dvfs(struct intel_display *display, bool enable) val |= FORCE_DDR_FREQ_REQ_ACK; vlv_punit_write(display->drm, PUNIT_REG_DDR_SETUP2, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2) & - FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2), + (val & FORCE_DDR_FREQ_REQ_ACK) == 0, + 500, 3000, false); + if (ret) drm_err(display->drm, "timed out waiting for Punit DDR DVFS request\n"); @@ -3902,6 +3911,7 @@ static void vlv_wm_get_hw_state(struct intel_display *display) struct vlv_wm_values *wm = &display->wm.vlv; struct intel_crtc *crtc; u32 val; + int ret; vlv_read_wm_values(display, wm); @@ -3928,8 +3938,10 @@ static void vlv_wm_get_hw_state(struct intel_display *display) val |= FORCE_DDR_FREQ_REQ_ACK; vlv_punit_write(display->drm, PUNIT_REG_DDR_SETUP2, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2) & - FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DDR_SETUP2), + (val & FORCE_DDR_FREQ_REQ_ACK) == 0, + 500, 3000, false); + if (ret) { drm_dbg_kms(display->drm, "Punit not acking DDR DVFS request, " "assuming DDR DVFS is disabled\n"); diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 8d9cb73a93a7..37faa8f19f6e 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -25,6 +25,8 @@ * Jani Nikula <jani.nikula@intel.com> */ +#include <linux/iopoll.h> + #include <drm/display/drm_dsc_helper.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_fixed.h> @@ -72,8 +74,12 @@ static int payload_credits_available(struct intel_display *display, static bool wait_for_header_credits(struct intel_display *display, enum transcoder dsi_trans, int hdr_credit) { - if (wait_for_us(header_credits_available(display, dsi_trans) >= - hdr_credit, 100)) { + int ret, available; + + ret = poll_timeout_us(available = header_credits_available(display, dsi_trans), + available >= hdr_credit, + 10, 100, false); + if (ret) { drm_err(display->drm, "DSI header credits not released\n"); return false; } @@ -84,8 +90,12 @@ static bool wait_for_header_credits(struct intel_display *display, static bool wait_for_payload_credits(struct intel_display *display, enum transcoder dsi_trans, int payld_credit) { - if (wait_for_us(payload_credits_available(display, dsi_trans) >= - payld_credit, 100)) { + int ret, available; + + ret = poll_timeout_us(available = payload_credits_available(display, dsi_trans), + available >= payld_credit, + 10, 100, false); + if (ret) { drm_err(display->drm, "DSI payload credits not released\n"); return false; } @@ -137,8 +147,11 @@ static void wait_for_cmds_dispatched_to_panel(struct intel_encoder *encoder) /* wait for LP TX in progress bit to be cleared */ for_each_dsi_port(port, intel_dsi->ports) { dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us(!(intel_de_read(display, DSI_LP_MSG(dsi_trans)) & - LPTX_IN_PROGRESS), 20)) + + ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), + LPTX_IN_PROGRESS, 0, + 20, 0, NULL); + if (ret) drm_err(display->drm, "LPTX bit not cleared\n"); } } @@ -516,13 +529,15 @@ static void gen11_dsi_enable_ddi_buffer(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; + int ret; for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), 0, DDI_BUF_CTL_ENABLE); - if (wait_for_us(!(intel_de_read(display, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), - 500)) + ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, 0, + 500, 0, NULL); + if (ret) drm_err(display->drm, "DDI port:%c buffer idle\n", port_name(port)); } @@ -838,9 +853,14 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* wait for link ready */ for_each_dsi_port(port, intel_dsi->ports) { + int ret; + dsi_trans = dsi_port_to_transcoder(port); - if (wait_for_us((intel_de_read(display, DSI_TRANS_FUNC_CONF(dsi_trans)) & - LINK_READY), 2500)) + + ret = intel_de_wait_custom(display, DSI_TRANS_FUNC_CONF(dsi_trans), + LINK_READY, LINK_READY, + 2500, 0, NULL); + if (ret) drm_err(display->drm, "DSI link not ready\n"); } } @@ -1321,6 +1341,7 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) enum port port; enum transcoder dsi_trans; u32 tmp; + int ret; /* disable periodic update mode */ if (is_cmd_mode(intel_dsi)) { @@ -1337,9 +1358,10 @@ static void gen11_dsi_deconfigure_trancoder(struct intel_encoder *encoder) tmp &= ~LINK_ULPS_TYPE_LP11; intel_de_write(display, DSI_LP_MSG(dsi_trans), tmp); - if (wait_for_us((intel_de_read(display, DSI_LP_MSG(dsi_trans)) & - LINK_IN_ULPS), - 10)) + ret = intel_de_wait_custom(display, DSI_LP_MSG(dsi_trans), + LINK_IN_ULPS, LINK_IN_ULPS, + 10, 0, NULL); + if (ret) drm_err(display->drm, "DSI link not in ULPS\n"); } @@ -1367,14 +1389,17 @@ static void gen11_dsi_disable_port(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); enum port port; + int ret; gen11_dsi_ungate_clocks(encoder); for_each_dsi_port(port, intel_dsi->ports) { intel_de_rmw(display, DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE, 0); - if (wait_for_us((intel_de_read(display, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), - 8)) + ret = intel_de_wait_custom(display, DDI_BUF_CTL(port), + DDI_BUF_IS_IDLE, DDI_BUF_IS_IDLE, + 8, 0, NULL); + + if (ret) drm_err(display->drm, "DDI port:%c buffer not idle\n", port_name(port)); diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index dfdde8e4eabe..ed7a7ed486b5 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -16,6 +16,14 @@ #include "intel_psr.h" #include "intel_psr_regs.h" +#define SILENCE_PERIOD_MIN_TIME 80 +#define SILENCE_PERIOD_MAX_TIME 180 +#define SILENCE_PERIOD_TIME (SILENCE_PERIOD_MIN_TIME + \ + (SILENCE_PERIOD_MAX_TIME - \ + SILENCE_PERIOD_MIN_TIME) / 2) + +#define LFPS_CYCLE_COUNT 10 + bool intel_alpm_aux_wake_supported(struct intel_dp *intel_dp) { return intel_dp->alpm_dpcd & DP_ALPM_CAP; @@ -44,72 +52,49 @@ void intel_alpm_init(struct intel_dp *intel_dp) mutex_init(&intel_dp->alpm_parameters.lock); } -/* - * See Bspec: 71632 for the table - * - * Silence_period = tSilence,Min + ((tSilence,Max - tSilence,Min) / 2) - * - * Half cycle duration: - * - * Link rates 1.62 - 4.32 and tLFPS_Cycle = 70 ns - * FLOOR( (Link Rate * tLFPS_Cycle) / (2 * 10) ) - * - * Link rates 5.4 - 8.1 - * PORT_ALPM_LFPS_CTL[ LFPS Cycle Count ] = 10 - * LFPS Period chosen is the mid-point of the min:max values from the table - * FLOOR( LFPS Period in Symbol clocks / - * (2 * PORT_ALPM_LFPS_CTL[ LFPS Cycle Count ]) ) - */ -static bool _lnl_get_silence_period_and_lfps_half_cycle(int link_rate, - int *silence_period, - int *lfps_half_cycle) +static int get_silence_period_symbols(const struct intel_crtc_state *crtc_state) { - switch (link_rate) { - case 162000: - *silence_period = 20; - *lfps_half_cycle = 5; - break; - case 216000: - *silence_period = 27; - *lfps_half_cycle = 7; - break; - case 243000: - *silence_period = 31; - *lfps_half_cycle = 8; - break; - case 270000: - *silence_period = 34; - *lfps_half_cycle = 9; - break; - case 324000: - *silence_period = 41; - *lfps_half_cycle = 11; - break; - case 432000: - *silence_period = 56; - *lfps_half_cycle = 15; - break; - case 540000: - *silence_period = 69; - *lfps_half_cycle = 12; - break; - case 648000: - *silence_period = 84; - *lfps_half_cycle = 15; - break; - case 675000: - *silence_period = 87; - *lfps_half_cycle = 15; - break; - case 810000: - *silence_period = 104; - *lfps_half_cycle = 19; - break; - default: - *silence_period = *lfps_half_cycle = -1; - return false; + return SILENCE_PERIOD_TIME * intel_dp_link_symbol_clock(crtc_state->port_clock) / + 1000 / 1000; +} + +static int get_lfps_cycle_min_max_time(const struct intel_crtc_state *crtc_state, + int *min, int *max) +{ + if (crtc_state->port_clock < 540000) { + *min = 65 * LFPS_CYCLE_COUNT; + *max = 75 * LFPS_CYCLE_COUNT; + } else if (crtc_state->port_clock <= 810000) { + *min = 140; + *max = 800; + } else { + *min = *max = -1; + return -1; } - return true; + + return 0; +} + +static int get_lfps_cycle_time(const struct intel_crtc_state *crtc_state) +{ + int tlfps_cycle_min, tlfps_cycle_max, ret; + + ret = get_lfps_cycle_min_max_time(crtc_state, &tlfps_cycle_min, + &tlfps_cycle_max); + if (ret) + return ret; + + return tlfps_cycle_min + (tlfps_cycle_max - tlfps_cycle_min) / 2; +} + +static int get_lfps_half_cycle_clocks(const struct intel_crtc_state *crtc_state) +{ + int lfps_cycle_time = get_lfps_cycle_time(crtc_state); + + if (lfps_cycle_time < 0) + return -1; + + return lfps_cycle_time * crtc_state->port_clock / 1000 / 1000 / (2 * LFPS_CYCLE_COUNT); } /* @@ -131,21 +116,19 @@ static bool _lnl_get_silence_period_and_lfps_half_cycle(int link_rate, * tML_PHY_LOCK = TPS4 Length * ( 10 / (Link Rate in MHz) ) * TPS4 Length = 252 Symbols */ -static int _lnl_compute_aux_less_wake_time(int port_clock) +static int _lnl_compute_aux_less_wake_time(const struct intel_crtc_state *crtc_state) { int tphy2_p2_to_p0 = 12 * 1000; - int tlfps_period_max = 800; - int tsilence_max = 180; int t1 = 50 * 1000; int tps4 = 252; /* port_clock is link rate in 10kbit/s units */ - int tml_phy_lock = 1000 * 1000 * tps4 / port_clock; + int tml_phy_lock = 1000 * 1000 * tps4 / crtc_state->port_clock; int num_ml_phy_lock = 7 + DIV_ROUND_UP(6500, tml_phy_lock) + 1; int t2 = num_ml_phy_lock * tml_phy_lock; int tcds = 1 * t2; - return DIV_ROUND_UP(tphy2_p2_to_p0 + tlfps_period_max + tsilence_max + - t1 + tcds, 1000); + return DIV_ROUND_UP(tphy2_p2_to_p0 + get_lfps_cycle_time(crtc_state) + + SILENCE_PERIOD_TIME + t1 + tcds, 1000); } static int @@ -157,13 +140,13 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, lfps_half_cycle; aux_less_wake_time = - _lnl_compute_aux_less_wake_time(crtc_state->port_clock); + _lnl_compute_aux_less_wake_time(crtc_state); aux_less_wake_lines = intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, aux_less_wake_time); + silence_period = get_silence_period_symbols(crtc_state); - if (!_lnl_get_silence_period_and_lfps_half_cycle(crtc_state->port_clock, - &silence_period, - &lfps_half_cycle)) + lfps_half_cycle = get_lfps_half_cycle_clocks(crtc_state); + if (lfps_half_cycle < 0) return false; if (aux_less_wake_lines > ALPM_CTL_AUX_LESS_WAKE_TIME_MASK || @@ -406,7 +389,7 @@ void intel_alpm_port_configure(struct intel_dp *intel_dp, PORT_ALPM_CTL_MAX_PHY_SWING_HOLD(0) | PORT_ALPM_CTL_SILENCE_PERIOD( intel_dp->alpm_parameters.silence_period_sym_clocks); - lfps_ctl_val = PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(10) | + lfps_ctl_val = PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(LFPS_CYCLE_COUNT) | PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION( intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION( diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index f85edb374c97..348b1655435e 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -26,7 +26,7 @@ * * The functions here implement the state management and hardware programming * dispatch required by the atomic modeset infrastructure. - * See intel_atomic_plane.c for the plane-specific atomic functionality. + * See intel_plane.c for the plane-specific atomic functionality. */ #include <drm/display/drm_dp_tunnel.h> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 55af3a553c58..5bdaef38f13d 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -951,7 +951,7 @@ static int glk_force_audio_cdclk_commit(struct intel_atomic_state *state, if (IS_ERR(cdclk_state)) return PTR_ERR(cdclk_state); - cdclk_state->force_min_cdclk = enable ? 2 * 96000 : 0; + intel_cdclk_force_min_cdclk(cdclk_state, enable ? 2 * 96000 : 0); return drm_atomic_commit(&state->base); } diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index e007380e9a63..3b14f929825a 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -236,7 +236,8 @@ static void i9xx_set_backlight(const struct drm_connector_state *conn_state, u32 struct intel_panel *panel = &connector->panel; u32 tmp, mask; - drm_WARN_ON(display->drm, panel->backlight.pwm_level_max == 0); + if (drm_WARN_ON(display->drm, panel->backlight.pwm_level_max == 0)) + return; if (panel->backlight.combination_mode) { struct pci_dev *pdev = to_pci_dev(display->drm->dev); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9ce41e689d50..3596dce84c28 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -36,6 +36,7 @@ #include "soc/intel_rom.h" #include "i915_drv.h" +#include "i915_utils.h" #include "intel_display.h" #include "intel_display_core.h" #include "intel_display_rpm.h" @@ -1566,10 +1567,7 @@ parse_psr(struct intel_display *display, panel->vbt.psr.full_link = psr_table->full_link; panel->vbt.psr.require_aux_wakeup = psr_table->require_aux_to_wakeup; - - /* Allowed VBT values goes from 0 to 15 */ - panel->vbt.psr.idle_frames = psr_table->idle_frames < 0 ? 0 : - psr_table->idle_frames > 15 ? 15 : psr_table->idle_frames; + panel->vbt.psr.idle_frames = psr_table->idle_frames; /* * New psr options 0=500us, 1=100us, 2=2500us, 3=0us @@ -1939,7 +1937,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, int index, len; if (drm_WARN_ON(display->drm, - !data || panel->vbt.dsi.seq_version != 1)) + !data || panel->vbt.dsi.seq_version >= 3)) return 0; /* index = 1 to skip sequence byte */ @@ -1962,7 +1960,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, } /* - * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence. * The deassert must be done before calling intel_dsi_device_ready, so for * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. @@ -1973,9 +1971,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display, u8 *init_otp; int len; - /* Limit this to v1 vid-mode sequences */ + /* Limit this to v1/v2 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || - panel->vbt.dsi.seq_version != 1) + panel->vbt.dsi.seq_version >= 3) return; /* Only do this if there are otp and assert seqs and no deassert seq */ @@ -2480,6 +2478,25 @@ static int parse_bdb_216_dp_max_link_rate(const int vbt_max_link_rate) } } +static u32 edp_rate_override_mask(int rate) +{ + switch (rate) { + case 2000000: return BDB_263_VBT_EDP_LINK_RATE_20; + case 1350000: return BDB_263_VBT_EDP_LINK_RATE_13_5; + case 1000000: return BDB_263_VBT_EDP_LINK_RATE_10; + case 810000: return BDB_263_VBT_EDP_LINK_RATE_8_1; + case 675000: return BDB_263_VBT_EDP_LINK_RATE_6_75; + case 540000: return BDB_263_VBT_EDP_LINK_RATE_5_4; + case 432000: return BDB_263_VBT_EDP_LINK_RATE_4_32; + case 324000: return BDB_263_VBT_EDP_LINK_RATE_3_24; + case 270000: return BDB_263_VBT_EDP_LINK_RATE_2_7; + case 243000: return BDB_263_VBT_EDP_LINK_RATE_2_43; + case 216000: return BDB_263_VBT_EDP_LINK_RATE_2_16; + case 162000: return BDB_263_VBT_EDP_LINK_RATE_1_62; + default: return 0; + } +} + int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) { if (!devdata || devdata->display->vbt.version < 216) @@ -2499,6 +2516,19 @@ int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) return devdata->child.dp_max_lane_count + 1; } +bool +intel_bios_encoder_reject_edp_rate(const struct intel_bios_encoder_data *devdata, + int rate) +{ + if (!devdata || devdata->display->vbt.version < 263) + return false; + + if (devdata->child.edp_data_rate_override == BDB_263_VBT_EDP_RATES_MASK) + return false; + + return devdata->child.edp_data_rate_override & edp_rate_override_mask(rate); +} + static void sanitize_device_type(struct intel_bios_encoder_data *devdata, enum port port) { @@ -2747,8 +2777,10 @@ static int child_device_expected_size(u16 version) { BUILD_BUG_ON(sizeof(struct child_device_config) < 40); - if (version > 256) + if (version > 263) return -ENOENT; + else if (version >= 263) + return 44; else if (version >= 256) return 40; else if (version >= 216) @@ -3743,8 +3775,6 @@ DEFINE_SHOW_ATTRIBUTE(intel_bios_vbt); void intel_bios_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_vbt", 0444, minor->debugfs_root, + debugfs_create_file("i915_vbt", 0444, display->drm->debugfs_root, display, &intel_bios_vbt_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 6cd7a011b8c4..f9e438b2787b 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -50,180 +50,6 @@ enum intel_backlight_type { INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE, }; -/* - * MIPI Sequence Block definitions - * - * Note the VBT spec has AssertReset / DeassertReset swapped from their - * usual naming, we use the proper names here to avoid confusion when - * reading the code. - */ -enum mipi_seq { - MIPI_SEQ_END = 0, - MIPI_SEQ_DEASSERT_RESET, /* Spec says MipiAssertResetPin */ - MIPI_SEQ_INIT_OTP, - MIPI_SEQ_DISPLAY_ON, - MIPI_SEQ_DISPLAY_OFF, - MIPI_SEQ_ASSERT_RESET, /* Spec says MipiDeassertResetPin */ - MIPI_SEQ_BACKLIGHT_ON, /* sequence block v2+ */ - MIPI_SEQ_BACKLIGHT_OFF, /* sequence block v2+ */ - MIPI_SEQ_TEAR_ON, /* sequence block v2+ */ - MIPI_SEQ_TEAR_OFF, /* sequence block v3+ */ - MIPI_SEQ_POWER_ON, /* sequence block v3+ */ - MIPI_SEQ_POWER_OFF, /* sequence block v3+ */ - MIPI_SEQ_MAX -}; - -enum mipi_seq_element { - MIPI_SEQ_ELEM_END = 0, - MIPI_SEQ_ELEM_SEND_PKT, - MIPI_SEQ_ELEM_DELAY, - MIPI_SEQ_ELEM_GPIO, - MIPI_SEQ_ELEM_I2C, /* sequence block v2+ */ - MIPI_SEQ_ELEM_SPI, /* sequence block v3+ */ - MIPI_SEQ_ELEM_PMIC, /* sequence block v3+ */ - MIPI_SEQ_ELEM_MAX -}; - -#define MIPI_DSI_UNDEFINED_PANEL_ID 0 -#define MIPI_DSI_GENERIC_PANEL_ID 1 - -struct mipi_config { - u16 panel_id; - - /* General Params */ - u32 enable_dithering:1; - u32 rsvd1:1; - u32 is_bridge:1; - - u32 panel_arch_type:2; - u32 is_cmd_mode:1; - -#define NON_BURST_SYNC_PULSE 0x1 -#define NON_BURST_SYNC_EVENTS 0x2 -#define BURST_MODE 0x3 - u32 video_transfer_mode:2; - - u32 cabc_supported:1; -#define PPS_BLC_PMIC 0 -#define PPS_BLC_SOC 1 - u32 pwm_blc:1; - - /* Bit 13:10 */ -#define PIXEL_FORMAT_RGB565 0x1 -#define PIXEL_FORMAT_RGB666 0x2 -#define PIXEL_FORMAT_RGB666_LOOSELY_PACKED 0x3 -#define PIXEL_FORMAT_RGB888 0x4 - u32 videomode_color_format:4; - - /* Bit 15:14 */ -#define ENABLE_ROTATION_0 0x0 -#define ENABLE_ROTATION_90 0x1 -#define ENABLE_ROTATION_180 0x2 -#define ENABLE_ROTATION_270 0x3 - u32 rotation:2; - u32 bta_enabled:1; - u32 rsvd2:15; - - /* 2 byte Port Description */ -#define DUAL_LINK_NOT_SUPPORTED 0 -#define DUAL_LINK_FRONT_BACK 1 -#define DUAL_LINK_PIXEL_ALT 2 - u16 dual_link:2; - u16 lane_cnt:2; - u16 pixel_overlap:3; - u16 rgb_flip:1; -#define DL_DCS_PORT_A 0x00 -#define DL_DCS_PORT_C 0x01 -#define DL_DCS_PORT_A_AND_C 0x02 - u16 dl_dcs_cabc_ports:2; - u16 dl_dcs_backlight_ports:2; - u16 rsvd3:4; - - u16 rsvd4; - - u8 rsvd5; - u32 target_burst_mode_freq; - u32 dsi_ddr_clk; - u32 bridge_ref_clk; - -#define BYTE_CLK_SEL_20MHZ 0 -#define BYTE_CLK_SEL_10MHZ 1 -#define BYTE_CLK_SEL_5MHZ 2 - u8 byte_clk_sel:2; - - u8 rsvd6:6; - - /* DPHY Flags */ - u16 dphy_param_valid:1; - u16 eot_pkt_disabled:1; - u16 enable_clk_stop:1; - u16 rsvd7:13; - - u32 hs_tx_timeout; - u32 lp_rx_timeout; - u32 turn_around_timeout; - u32 device_reset_timer; - u32 master_init_timer; - u32 dbi_bw_timer; - u32 lp_byte_clk_val; - - /* 4 byte Dphy Params */ - u32 prepare_cnt:6; - u32 rsvd8:2; - u32 clk_zero_cnt:8; - u32 trail_cnt:5; - u32 rsvd9:3; - u32 exit_zero_cnt:6; - u32 rsvd10:2; - - u32 clk_lane_switch_cnt; - u32 hl_switch_cnt; - - u32 rsvd11[6]; - - /* timings based on dphy spec */ - u8 tclk_miss; - u8 tclk_post; - u8 rsvd12; - u8 tclk_pre; - u8 tclk_prepare; - u8 tclk_settle; - u8 tclk_term_enable; - u8 tclk_trail; - u16 tclk_prepare_clkzero; - u8 rsvd13; - u8 td_term_enable; - u8 teot; - u8 ths_exit; - u8 ths_prepare; - u16 ths_prepare_hszero; - u8 rsvd14; - u8 ths_settle; - u8 ths_skip; - u8 ths_trail; - u8 tinit; - u8 tlpx; - u8 rsvd15[3]; - - /* GPIOs */ - u8 panel_enable; - u8 bl_enable; - u8 pwm_enable; - u8 reset_r_n; - u8 pwr_down_r; - u8 stdby_r_n; - -} __packed; - -/* all delays have a unit of 100us */ -struct mipi_pps_data { - u16 panel_on_delay; - u16 bl_enable_delay; - u16 bl_disable_delay; - u16 panel_off_delay; - u16 panel_power_cycle_delay; -} __packed; - void intel_bios_init(struct intel_display *display); void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, @@ -259,6 +85,8 @@ bool intel_bios_encoder_is_lspcon(const struct intel_bios_encoder_data *devdata) bool intel_bios_encoder_lane_reversal(const struct intel_bios_encoder_data *devdata); bool intel_bios_encoder_hpd_invert(const struct intel_bios_encoder_data *devdata); enum port intel_bios_encoder_port(const struct intel_bios_encoder_data *devdata); +bool intel_bios_encoder_reject_edp_rate(const struct intel_bios_encoder_data *devdata, + int rate); enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata); int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata); diff --git a/drivers/gpu/drm/i915/display/intel_bo.c b/drivers/gpu/drm/i915/display/intel_bo.c index fbd16d7b58d9..6ae1374d5c2b 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.c +++ b/drivers/gpu/drm/i915/display/intel_bo.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ +#include <drm/drm_panic.h> + #include "gem/i915_gem_mman.h" #include "gem/i915_gem_object.h" #include "gem/i915_gem_object_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/display/intel_bo.h b/drivers/gpu/drm/i915/display/intel_bo.h index ea7a2253aaa5..48d87019e48a 100644 --- a/drivers/gpu/drm/i915/display/intel_bo.h +++ b/drivers/gpu/drm/i915/display/intel_bo.h @@ -7,6 +7,8 @@ #include <linux/types.h> struct drm_gem_object; +struct drm_scanout_buffer; +struct intel_framebuffer; struct seq_file; struct vm_area_struct; diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 6c2ab2e0dc91..ac6da20d9529 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -18,8 +18,44 @@ #include "intel_display_types.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" +#include "intel_uncore.h" #include "skl_watermark.h" +struct intel_dbuf_bw { + unsigned int max_bw[I915_MAX_DBUF_SLICES]; + u8 active_planes[I915_MAX_DBUF_SLICES]; +}; + +struct intel_bw_state { + struct intel_global_state base; + struct intel_dbuf_bw dbuf_bw[I915_MAX_PIPES]; + + /* + * Contains a bit mask, used to determine, whether correspondent + * pipe allows SAGV or not. + */ + u8 pipe_sagv_reject; + + /* bitmask of active pipes */ + u8 active_pipes; + + /* + * From MTL onwards, to lock a QGV point, punit expects the peak BW of + * the selected QGV point as the parameter in multiples of 100MB/s + */ + u16 qgv_point_peakbw; + + /* + * Current QGV points mask, which restricts + * some particular SAGV states, not to confuse + * with pipe_sagv_mask. + */ + u16 qgv_points_mask; + + unsigned int data_rate[I915_MAX_PIPES]; + u8 num_active_planes[I915_MAX_PIPES]; +}; + /* Parameters for Qclk Geyserville (QGV) */ struct intel_qgv_point { u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; @@ -82,14 +118,13 @@ static int icl_pcode_read_qgv_point_info(struct intel_display *display, struct intel_qgv_point *sp, int point) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 val = 0, val2 = 0; u16 dclk; int ret; - ret = snb_pcode_read(&i915->uncore, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | - ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point), - &val, &val2); + ret = intel_pcode_read(display->drm, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | + ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point), + &val, &val2); if (ret) return ret; @@ -110,13 +145,12 @@ static int icl_pcode_read_qgv_point_info(struct intel_display *display, static int adls_pcode_read_psf_gv_point_info(struct intel_display *display, struct intel_psf_gv_point *points) { - struct drm_i915_private *i915 = to_i915(display->drm); u32 val = 0; int ret; int i; - ret = snb_pcode_read(&i915->uncore, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | - ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL); + ret = intel_pcode_read(display->drm, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | + ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL); if (ret) return ret; @@ -154,21 +188,20 @@ static bool is_sagv_enabled(struct intel_display *display, u16 points_mask) ICL_PCODE_REQ_QGV_PT_MASK); } -int icl_pcode_restrict_qgv_points(struct intel_display *display, - u32 points_mask) +static int icl_pcode_restrict_qgv_points(struct intel_display *display, + u32 points_mask) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret; if (DISPLAY_VER(display) >= 14) return 0; /* bspec says to keep retrying for at least 1 ms */ - ret = skl_pcode_request(&i915->uncore, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, - points_mask, - ICL_PCODE_REP_QGV_MASK | ADLS_PCODE_REP_PSF_MASK, - ICL_PCODE_REP_QGV_SAFE | ADLS_PCODE_REP_PSF_SAFE, - 1); + ret = intel_pcode_request(display->drm, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, + points_mask, + ICL_PCODE_REP_QGV_MASK | ADLS_PCODE_REP_PSF_MASK, + ICL_PCODE_REP_QGV_SAFE | ADLS_PCODE_REP_PSF_SAFE, + 1); if (ret < 0) { drm_err(display->drm, @@ -326,7 +359,7 @@ static int icl_get_qgv_points(struct intel_display *display, for (i = 0; i < qi->num_psf_points; i++) drm_dbg_kms(display->drm, - "PSF GV %d: CLK=%d \n", + "PSF GV %d: CLK=%d\n", i, qi->psf_points[i].clk); } @@ -420,6 +453,13 @@ static const struct intel_sa_info xe3lpd_sa_info = { .derating = 10, }; +static const struct intel_sa_info xe3lpd_3002_sa_info = { + .deburst = 32, + .deprogbwlimit = 22, /* GB/s */ + .displayrtids = 256, + .derating = 10, +}; + static int icl_get_bw_info(struct intel_display *display, const struct dram_info *dram_info, const struct intel_sa_info *sa) @@ -771,7 +811,9 @@ void intel_bw_init_hw(struct intel_display *display) if (!HAS_DISPLAY(display)) return; - if (DISPLAY_VER(display) >= 30) + if (DISPLAY_VERx100(display) >= 3002) + tgl_get_bw_info(display, dram_info, &xe3lpd_3002_sa_info); + else if (DISPLAY_VER(display) >= 30) tgl_get_bw_info(display, dram_info, &xe3lpd_sa_info); else if (DISPLAY_VERx100(display) >= 1401 && display->platform.dgfx && dram_info->type == INTEL_DRAM_GDDR_ECC) @@ -865,6 +907,11 @@ static unsigned int intel_bw_data_rate(struct intel_display *display, return data_rate; } +struct intel_bw_state *to_intel_bw_state(struct intel_global_state *obj_state) +{ + return container_of(obj_state, struct intel_bw_state, base); +} + struct intel_bw_state * intel_atomic_get_old_bw_state(struct intel_atomic_state *state) { @@ -974,6 +1021,70 @@ static void icl_force_disable_sagv(struct intel_display *display, icl_pcode_restrict_qgv_points(display, bw_state->qgv_points_mask); } +void icl_sagv_pre_plane_update(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + const struct intel_bw_state *old_bw_state = + intel_atomic_get_old_bw_state(state); + const struct intel_bw_state *new_bw_state = + intel_atomic_get_new_bw_state(state); + u16 old_mask, new_mask; + + if (!new_bw_state) + return; + + old_mask = old_bw_state->qgv_points_mask; + new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; + + if (old_mask == new_mask) + return; + + WARN_ON(!new_bw_state->base.changed); + + drm_dbg_kms(display->drm, "Restricting QGV points: 0x%x -> 0x%x\n", + old_mask, new_mask); + + /* + * Restrict required qgv points before updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(display, new_mask); +} + +void icl_sagv_post_plane_update(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + const struct intel_bw_state *old_bw_state = + intel_atomic_get_old_bw_state(state); + const struct intel_bw_state *new_bw_state = + intel_atomic_get_new_bw_state(state); + u16 old_mask, new_mask; + + if (!new_bw_state) + return; + + old_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; + new_mask = new_bw_state->qgv_points_mask; + + if (old_mask == new_mask) + return; + + WARN_ON(!new_bw_state->base.changed); + + drm_dbg_kms(display->drm, "Relaxing QGV points: 0x%x -> 0x%x\n", + old_mask, new_mask); + + /* + * Allow required qgv points after updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(display, new_mask); +} + static int mtl_find_qgv_points(struct intel_display *display, unsigned int data_rate, unsigned int num_active_planes, @@ -994,7 +1105,7 @@ static int mtl_find_qgv_points(struct intel_display *display, * for qgv peak bw in PM Demand request. So assign UINT_MAX if SAGV is * not enabled. PM Demand code will clamp the value for the register */ - if (!intel_can_enable_sagv(display, new_bw_state)) { + if (!intel_bw_can_enable_sagv(display, new_bw_state)) { new_bw_state->qgv_point_peakbw = U16_MAX; drm_dbg_kms(display->drm, "No SAGV, use UINT_MAX as peak bw."); return 0; @@ -1107,7 +1218,7 @@ static int icl_find_qgv_points(struct intel_display *display, * we can't enable SAGV due to the increased memory latency it may * cause. */ - if (!intel_can_enable_sagv(display, new_bw_state)) { + if (!intel_bw_can_enable_sagv(display, new_bw_state)) { qgv_points = icl_max_bw_qgv_point_mask(display, num_active_planes); drm_dbg_kms(display->drm, "No SAGV, using single QGV point mask 0x%x\n", qgv_points); @@ -1357,12 +1468,12 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, * requirements. This can reduce back and forth * display blinking due to constant cdclk changes. */ - if (new_min_cdclk <= cdclk_state->bw_min_cdclk) + if (new_min_cdclk <= intel_cdclk_bw_min_cdclk(cdclk_state)) return 0; drm_dbg_kms(display->drm, "new bandwidth min cdclk (%d kHz) > old min cdclk (%d kHz)\n", - new_min_cdclk, cdclk_state->bw_min_cdclk); + new_min_cdclk, intel_cdclk_bw_min_cdclk(cdclk_state)); *need_cdclk_calc = true; return 0; @@ -1474,8 +1585,8 @@ static int intel_bw_check_sagv_mask(struct intel_atomic_state *state) if (!new_bw_state) return 0; - if (intel_can_enable_sagv(display, new_bw_state) != - intel_can_enable_sagv(display, old_bw_state)) { + if (intel_bw_can_enable_sagv(display, new_bw_state) != + intel_bw_can_enable_sagv(display, old_bw_state)) { ret = intel_atomic_serialize_global_state(&new_bw_state->base); if (ret) return ret; @@ -1521,8 +1632,8 @@ int intel_bw_atomic_check(struct intel_atomic_state *state, bool any_ms) new_bw_state = intel_atomic_get_new_bw_state(state); if (new_bw_state && - intel_can_enable_sagv(display, old_bw_state) != - intel_can_enable_sagv(display, new_bw_state)) + intel_bw_can_enable_sagv(display, old_bw_state) != + intel_bw_can_enable_sagv(display, new_bw_state)) changed = true; /* @@ -1644,3 +1755,32 @@ int intel_bw_init(struct intel_display *display) return 0; } + +bool intel_bw_pmdemand_needs_update(struct intel_atomic_state *state) +{ + const struct intel_bw_state *new_bw_state, *old_bw_state; + + new_bw_state = intel_atomic_get_new_bw_state(state); + old_bw_state = intel_atomic_get_old_bw_state(state); + + if (new_bw_state && + new_bw_state->qgv_point_peakbw != old_bw_state->qgv_point_peakbw) + return true; + + return false; +} + +bool intel_bw_can_enable_sagv(struct intel_display *display, + const struct intel_bw_state *bw_state) +{ + if (DISPLAY_VER(display) < 11 && + bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) + return false; + + return bw_state->pipe_sagv_reject == 0; +} + +int intel_bw_qgv_point_peakbw(const struct intel_bw_state *bw_state) +{ + return bw_state->qgv_point_peakbw; +} diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index eb2cc883e9c1..d51f50c9d302 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -8,52 +8,14 @@ #include <drm/drm_atomic.h> -#include "intel_display_limits.h" -#include "intel_display_power.h" -#include "intel_global_state.h" - struct intel_atomic_state; +struct intel_bw_state; struct intel_crtc; struct intel_crtc_state; struct intel_display; +struct intel_global_state; -struct intel_dbuf_bw { - unsigned int max_bw[I915_MAX_DBUF_SLICES]; - u8 active_planes[I915_MAX_DBUF_SLICES]; -}; - -struct intel_bw_state { - struct intel_global_state base; - struct intel_dbuf_bw dbuf_bw[I915_MAX_PIPES]; - - /* - * Contains a bit mask, used to determine, whether correspondent - * pipe allows SAGV or not. - */ - u8 pipe_sagv_reject; - - /* bitmask of active pipes */ - u8 active_pipes; - - /* - * From MTL onwards, to lock a QGV point, punit expects the peak BW of - * the selected QGV point as the parameter in multiples of 100MB/s - */ - u16 qgv_point_peakbw; - - /* - * Current QGV points mask, which restricts - * some particular SAGV states, not to confuse - * with pipe_sagv_mask. - */ - u16 qgv_points_mask; - - unsigned int data_rate[I915_MAX_PIPES]; - u8 num_active_planes[I915_MAX_PIPES]; -}; - -#define to_intel_bw_state(global_state) \ - container_of_const((global_state), struct intel_bw_state, base) +struct intel_bw_state *to_intel_bw_state(struct intel_global_state *obj_state); struct intel_bw_state * intel_atomic_get_old_bw_state(struct intel_atomic_state *state); @@ -67,8 +29,6 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state); void intel_bw_init_hw(struct intel_display *display); int intel_bw_init(struct intel_display *display); int intel_bw_atomic_check(struct intel_atomic_state *state, bool any_ms); -int icl_pcode_restrict_qgv_points(struct intel_display *display, - u32 points_mask); int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, bool *need_cdclk_calc); int intel_bw_min_cdclk(struct intel_display *display, @@ -76,4 +36,11 @@ int intel_bw_min_cdclk(struct intel_display *display, void intel_bw_update_hw_state(struct intel_display *display); void intel_bw_crtc_disable_noatomic(struct intel_crtc *crtc); +bool intel_bw_pmdemand_needs_update(struct intel_atomic_state *state); +bool intel_bw_can_enable_sagv(struct intel_display *display, + const struct intel_bw_state *bw_state); +void icl_sagv_pre_plane_update(struct intel_atomic_state *state); +void icl_sagv_post_plane_update(struct intel_atomic_state *state); +int intel_bw_qgv_point_peakbw(const struct intel_bw_state *bw_state); + #endif /* __INTEL_BW_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 38b3094b37d7..9725eebe5706 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -22,6 +22,7 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> #include <linux/time.h> #include <drm/drm_fixed.h> @@ -31,8 +32,8 @@ #include "hsw_ips.h" #include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_audio.h" #include "intel_bw.h" #include "intel_cdclk.h" @@ -43,6 +44,7 @@ #include "intel_mchbar_regs.h" #include "intel_pci_config.h" #include "intel_pcode.h" +#include "intel_plane.h" #include "intel_psr.h" #include "intel_vdsc.h" #include "skl_watermark.h" @@ -114,6 +116,42 @@ * dividers can be programmed correctly. */ +struct intel_cdclk_state { + struct intel_global_state base; + + /* + * Logical configuration of cdclk (used for all scaling, + * watermark, etc. calculations and checks). This is + * computed as if all enabled crtcs were active. + */ + struct intel_cdclk_config logical; + + /* + * Actual configuration of cdclk, can be different from the + * logical configuration only when all crtc's are DPMS off. + */ + struct intel_cdclk_config actual; + + /* minimum acceptable cdclk to satisfy bandwidth requirements */ + int bw_min_cdclk; + /* minimum acceptable cdclk for each pipe */ + int min_cdclk[I915_MAX_PIPES]; + /* minimum acceptable voltage level for each pipe */ + u8 min_voltage_level[I915_MAX_PIPES]; + + /* pipe to which cd2x update is synchronized */ + enum pipe pipe; + + /* forced minimum cdclk for glk+ audio w/a */ + int force_min_cdclk; + + /* bitmask of active pipes */ + u8 active_pipes; + + /* update cdclk with pipes disabled */ + bool disable_pipes; +}; + struct intel_cdclk_funcs { void (*get_cdclk)(struct intel_display *display, struct intel_cdclk_config *cdclk_config); @@ -636,6 +674,7 @@ static void vlv_set_cdclk(struct intel_display *display, int cdclk = cdclk_config->cdclk; u32 val, cmd = cdclk_config->voltage_level; intel_wakeref_t wakeref; + int ret; switch (cdclk) { case 400000: @@ -666,12 +705,12 @@ static void vlv_set_cdclk(struct intel_display *display, val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - drm_err(display->drm, - "timed out waiting for CDclk change\n"); - } + + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (val & DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); if (cdclk == 400000) { u32 divider; @@ -685,11 +724,11 @@ static void vlv_set_cdclk(struct intel_display *display, val |= divider; vlv_cck_write(display->drm, CCK_DISPLAY_CLOCK_CONTROL, val); - if (wait_for((vlv_cck_read(display->drm, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - drm_err(display->drm, - "timed out waiting for CDclk change\n"); + ret = poll_timeout_us(val = vlv_cck_read(display->drm, CCK_DISPLAY_CLOCK_CONTROL), + (val & CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); } /* adjust self-refresh exit latency value */ @@ -725,6 +764,7 @@ static void chv_set_cdclk(struct intel_display *display, int cdclk = cdclk_config->cdclk; u32 val, cmd = cdclk_config->voltage_level; intel_wakeref_t wakeref; + int ret; switch (cdclk) { case 333333: @@ -750,12 +790,12 @@ static void chv_set_cdclk(struct intel_display *display, val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, val); - if (wait_for((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - drm_err(display->drm, - "timed out waiting for CDclk change\n"); - } + + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (val & DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 500, 50 * 1000, false); + if (ret) + drm_err(display->drm, "timed out waiting for CDCLK change\n"); vlv_punit_put(display->drm); @@ -841,7 +881,6 @@ static void bdw_set_cdclk(struct intel_display *display, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int cdclk = cdclk_config->cdclk; int ret; @@ -854,7 +893,7 @@ static void bdw_set_cdclk(struct intel_display *display, "trying to change cdclk frequency with cdclk not enabled\n")) return; - ret = snb_pcode_write(&dev_priv->uncore, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + ret = intel_pcode_write(display->drm, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); if (ret) { drm_err(display->drm, "failed to inform pcode about cdclk change\n"); @@ -868,8 +907,10 @@ static void bdw_set_cdclk(struct intel_display *display, * According to the spec, it should be enough to poll for this 1 us. * However, extensive testing shows that this can take longer. */ - if (wait_for_us(intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 100)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, + 100, 0, NULL); + if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); intel_de_rmw(display, LCPLL_CTL, @@ -878,12 +919,14 @@ static void bdw_set_cdclk(struct intel_display *display, intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - if (wait_for_us((intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 0, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); - snb_pcode_write(&dev_priv->uncore, HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_config->voltage_level); + intel_pcode_write(display->drm, HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_config->voltage_level); intel_de_write(display, CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -1123,7 +1166,6 @@ static void skl_set_cdclk(struct intel_display *display, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int cdclk = cdclk_config->cdclk; int vco = cdclk_config->vco; u32 freq_select, cdclk_ctl; @@ -1140,10 +1182,10 @@ static void skl_set_cdclk(struct intel_display *display, drm_WARN_ON_ONCE(display->drm, display->platform.skylake && vco == 8640000); - ret = skl_pcode_request(&dev_priv->uncore, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); + ret = intel_pcode_request(display->drm, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); if (ret) { drm_err(display->drm, "Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1186,8 +1228,8 @@ static void skl_set_cdclk(struct intel_display *display, intel_de_posting_read(display, CDCLK_CTL); /* inform PCU of the change */ - snb_pcode_write(&dev_priv->uncore, SKL_PCODE_CDCLK_CONTROL, - cdclk_config->voltage_level); + intel_pcode_write(display->drm, SKL_PCODE_CDCLK_CONTROL, + cdclk_config->voltage_level); intel_update_cdclk(display); } @@ -2123,7 +2165,6 @@ static void bxt_set_cdclk(struct intel_display *display, const struct intel_cdclk_config *cdclk_config, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_cdclk_config mid_cdclk_config; int cdclk = cdclk_config->cdclk; int ret = 0; @@ -2137,18 +2178,18 @@ static void bxt_set_cdclk(struct intel_display *display, if (DISPLAY_VER(display) >= 14 || display->platform.dg2) ; /* NOOP */ else if (DISPLAY_VER(display) >= 11) - ret = skl_pcode_request(&dev_priv->uncore, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); + ret = intel_pcode_request(display->drm, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); else /* * BSpec requires us to wait up to 150usec, but that leads to * timeouts; the 2ms used here is based on experiment. */ - ret = snb_pcode_write_timeout(&dev_priv->uncore, - HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000, 150, 2); + ret = intel_pcode_write_timeout(display->drm, + HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 2); if (ret) { drm_err(display->drm, @@ -2177,8 +2218,8 @@ static void bxt_set_cdclk(struct intel_display *display, * Display versions 14 and beyond */; else if (DISPLAY_VER(display) >= 11 && !display->platform.dg2) - ret = snb_pcode_write(&dev_priv->uncore, SKL_PCODE_CDCLK_CONTROL, - cdclk_config->voltage_level); + ret = intel_pcode_write(display->drm, SKL_PCODE_CDCLK_CONTROL, + cdclk_config->voltage_level); if (DISPLAY_VER(display) < 11) { /* * The timeout isn't specified, the 2ms used here is based on @@ -2186,10 +2227,9 @@ static void bxt_set_cdclk(struct intel_display *display, * FIXME: Waiting for the request completion could be delayed * until the next PCODE request based on BSpec. */ - ret = snb_pcode_write_timeout(&dev_priv->uncore, - HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_config->voltage_level, - 150, 2); + ret = intel_pcode_write_timeout(display->drm, + HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_config->voltage_level, 2); } if (ret) { drm_err(display->drm, @@ -2475,7 +2515,6 @@ static void intel_pcode_notify(struct intel_display *display, bool cdclk_update_valid, bool pipe_count_update_valid) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret; u32 update_mask = 0; @@ -2490,11 +2529,11 @@ static void intel_pcode_notify(struct intel_display *display, if (pipe_count_update_valid) update_mask |= DISPLAY_TO_PCODE_PIPE_COUNT_VALID; - ret = skl_pcode_request(&i915->uncore, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE | - update_mask, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); + ret = intel_pcode_request(display->drm, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE | + update_mask, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); if (ret) drm_err(display->drm, "Failed to inform PCU about display config (err %d)\n", @@ -3386,7 +3425,9 @@ static int intel_compute_max_dotclk(struct intel_display *display) */ void intel_update_max_cdclk(struct intel_display *display) { - if (DISPLAY_VER(display) >= 30) { + if (DISPLAY_VERx100(display) >= 3002) { + display->cdclk.max_cdclk_freq = 480000; + } else if (DISPLAY_VER(display) >= 30) { display->cdclk.max_cdclk_freq = 691200; } else if (display->platform.jasperlake || display->platform.elkhartlake) { if (display->cdclk.hw.ref == 24000) @@ -3536,7 +3577,7 @@ static int i9xx_hrawclk(struct intel_display *display) struct drm_i915_private *i915 = to_i915(display->drm); /* hrawclock is 1/4 the FSB frequency */ - return DIV_ROUND_CLOSEST(i9xx_fsb_freq(i915), 4); + return DIV_ROUND_CLOSEST(intel_fsb_freq(i915), 4); } /** @@ -3589,9 +3630,7 @@ DEFINE_SHOW_ATTRIBUTE(i915_cdclk_info); void intel_cdclk_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_cdclk_info", 0444, minor->debugfs_root, + debugfs_create_file("i915_cdclk_info", 0444, display->drm->debugfs_root, display, &i915_cdclk_info_fops); } @@ -3837,3 +3876,60 @@ void intel_init_cdclk_hooks(struct intel_display *display) "Unknown platform. Assuming i830\n")) display->funcs.cdclk = &i830_cdclk_funcs; } + +int intel_cdclk_logical(const struct intel_cdclk_state *cdclk_state) +{ + return cdclk_state->logical.cdclk; +} + +int intel_cdclk_actual(const struct intel_cdclk_state *cdclk_state) +{ + return cdclk_state->actual.cdclk; +} + +int intel_cdclk_actual_voltage_level(const struct intel_cdclk_state *cdclk_state) +{ + return cdclk_state->actual.voltage_level; +} + +int intel_cdclk_min_cdclk(const struct intel_cdclk_state *cdclk_state, enum pipe pipe) +{ + return cdclk_state->min_cdclk[pipe]; +} + +int intel_cdclk_bw_min_cdclk(const struct intel_cdclk_state *cdclk_state) +{ + return cdclk_state->bw_min_cdclk; +} + +bool intel_cdclk_pmdemand_needs_update(struct intel_atomic_state *state) +{ + const struct intel_cdclk_state *new_cdclk_state, *old_cdclk_state; + + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); + old_cdclk_state = intel_atomic_get_old_cdclk_state(state); + + if (new_cdclk_state && + (new_cdclk_state->actual.cdclk != old_cdclk_state->actual.cdclk || + new_cdclk_state->actual.voltage_level != old_cdclk_state->actual.voltage_level)) + return true; + + return false; +} + +void intel_cdclk_force_min_cdclk(struct intel_cdclk_state *cdclk_state, int force_min_cdclk) +{ + cdclk_state->force_min_cdclk = force_min_cdclk; +} + +void intel_cdclk_read_hw(struct intel_display *display) +{ + struct intel_cdclk_state *cdclk_state; + + cdclk_state = to_intel_cdclk_state(display->cdclk.obj.state); + + intel_update_cdclk(display); + intel_cdclk_dump_config(display, &display->cdclk.hw, "Current CDCLK"); + cdclk_state->actual = display->cdclk.hw; + cdclk_state->logical = display->cdclk.hw; +} diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index a1cefd455d92..cacee598af0e 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -8,10 +8,9 @@ #include <linux/types.h> -#include "intel_display_limits.h" -#include "intel_global_state.h" - +enum pipe; struct intel_atomic_state; +struct intel_cdclk_state; struct intel_crtc; struct intel_crtc_state; struct intel_display; @@ -23,42 +22,6 @@ struct intel_cdclk_config { bool joined_mbus; }; -struct intel_cdclk_state { - struct intel_global_state base; - - /* - * Logical configuration of cdclk (used for all scaling, - * watermark, etc. calculations and checks). This is - * computed as if all enabled crtcs were active. - */ - struct intel_cdclk_config logical; - - /* - * Actual configuration of cdclk, can be different from the - * logical configuration only when all crtc's are DPMS off. - */ - struct intel_cdclk_config actual; - - /* minimum acceptable cdclk to satisfy bandwidth requirements */ - int bw_min_cdclk; - /* minimum acceptable cdclk for each pipe */ - int min_cdclk[I915_MAX_PIPES]; - /* minimum acceptable voltage level for each pipe */ - u8 min_voltage_level[I915_MAX_PIPES]; - - /* pipe to which cd2x update is synchronized */ - enum pipe pipe; - - /* forced minimum cdclk for glk+ audio w/a */ - int force_min_cdclk; - - /* bitmask of active pipes */ - u8 active_pipes; - - /* update cdclk with pipes disabled */ - bool disable_pipes; -}; - void intel_cdclk_init_hw(struct intel_display *display); void intel_cdclk_uninit_hw(struct intel_display *display); void intel_init_cdclk_hooks(struct intel_display *display); @@ -97,4 +60,13 @@ void intel_cdclk_crtc_disable_noatomic(struct intel_crtc *crtc); int intel_cdclk_init(struct intel_display *display); void intel_cdclk_debugfs_register(struct intel_display *display); +int intel_cdclk_logical(const struct intel_cdclk_state *cdclk_state); +int intel_cdclk_actual(const struct intel_cdclk_state *cdclk_state); +int intel_cdclk_actual_voltage_level(const struct intel_cdclk_state *cdclk_state); +int intel_cdclk_min_cdclk(const struct intel_cdclk_state *cdclk_state, enum pipe pipe); +int intel_cdclk_bw_min_cdclk(const struct intel_cdclk_state *cdclk_state); +bool intel_cdclk_pmdemand_needs_update(struct intel_atomic_state *state); +void intel_cdclk_force_min_cdclk(struct intel_cdclk_state *cdclk_state, int force_min_cdclk); +void intel_cdclk_read_hw(struct intel_display *display); + #endif /* __INTEL_CDCLK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index 2867d76d1a5e..6a55854db5b6 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -64,10 +64,10 @@ static void intel_connector_modeset_retry_work_fn(struct work_struct *work) void intel_connector_queue_modeset_retry_work(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); drm_connector_get(&connector->base); - if (!queue_work(i915->unordered_wq, &connector->modeset_retry_work)) + if (!queue_work(display->wq.unordered, &connector->modeset_retry_work)) drm_connector_put(&connector->base); } @@ -77,7 +77,7 @@ void intel_connector_cancel_modeset_retry_work(struct intel_connector *connector drm_connector_put(&connector->base); } -int intel_connector_init(struct intel_connector *connector) +static int intel_connector_init(struct intel_connector *connector) { struct intel_digital_connector_state *conn_state; diff --git a/drivers/gpu/drm/i915/display/intel_connector.h b/drivers/gpu/drm/i915/display/intel_connector.h index aafb25a814fa..0aa86626e646 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.h +++ b/drivers/gpu/drm/i915/display/intel_connector.h @@ -14,7 +14,6 @@ struct i2c_adapter; struct intel_connector; struct intel_encoder; -int intel_connector_init(struct intel_connector *connector); struct intel_connector *intel_connector_alloc(void); void intel_connector_free(struct intel_connector *connector); void intel_connector_destroy(struct drm_connector *connector); diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 898c5d9e8f7a..31e68047f217 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -50,6 +50,7 @@ #include "intel_gmbus.h" #include "intel_hotplug.h" #include "intel_hotplug_irq.h" +#include "intel_link_bw.h" #include "intel_load_detect.h" #include "intel_pch_display.h" #include "intel_pch_refclk.h" @@ -421,7 +422,7 @@ static int pch_crt_compute_config(struct intel_encoder *encoder, return -EINVAL; crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; @@ -446,7 +447,7 @@ static int hsw_crt_compute_config(struct intel_encoder *encoder, return -EINVAL; crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; crtc_state->output_format = INTEL_OUTPUT_FORMAT_RGB; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index a88317ea4e9c..a187db6df2d3 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -17,7 +17,6 @@ #include "i9xx_plane.h" #include "icl_dsi.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_color.h" #include "intel_crtc.h" #include "intel_cursor.h" @@ -29,6 +28,7 @@ #include "intel_dsi.h" #include "intel_fifo_underrun.h" #include "intel_pipe_crc.h" +#include "intel_plane.h" #include "intel_psr.h" #include "intel_sprite.h" #include "intel_vblank.h" diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 6bd4f6a28cae..d4d181f9dca5 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -14,7 +14,6 @@ #include "i915_utils.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_cursor.h" #include "intel_cursor_regs.h" #include "intel_de.h" @@ -23,6 +22,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_psr.h" #include "intel_psr_regs.h" #include "intel_vblank.h" @@ -33,17 +33,9 @@ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, }; -static u32 intel_cursor_base(const struct intel_plane_state *plane_state) +static u32 intel_cursor_surf_offset(const struct intel_plane_state *plane_state) { - struct intel_display *display = to_intel_display(plane_state); - u32 base; - - if (DISPLAY_INFO(display)->cursor_needs_physical) - base = plane_state->phys_dma_addr; - else - base = intel_plane_ggtt_offset(plane_state); - - return base + plane_state->view.color_plane[0].offset; + return plane_state->view.color_plane[0].offset; } static u32 intel_cursor_position(const struct intel_crtc_state *crtc_state, @@ -158,10 +150,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - ret = intel_atomic_plane_check_clipping(plane_state, crtc_state, - DRM_PLANE_NO_SCALING, - DRM_PLANE_NO_SCALING, - true); + ret = intel_plane_check_clipping(plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + true); if (ret) return ret; @@ -213,8 +205,7 @@ static u32 i845_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) return cntl; } -static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i845_cursor_ctl(const struct intel_plane_state *plane_state) { return CURSOR_ENABLE | CURSOR_FORMAT_ARGB | @@ -274,7 +265,7 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state); + plane_state->ctl = i845_cursor_ctl(plane_state); return 0; } @@ -297,7 +288,7 @@ static void i845_cursor_update_arm(struct intel_dsb *dsb, size = CURSOR_HEIGHT(height) | CURSOR_WIDTH(width); - base = intel_cursor_base(plane_state); + base = plane_state->surf; pos = intel_cursor_position(crtc_state, plane_state, false); } @@ -406,8 +397,7 @@ static u32 i9xx_cursor_ctl_crtc(const struct intel_crtc_state *crtc_state) return cntl; } -static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 i9xx_cursor_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); u32 cntl = 0; @@ -534,7 +524,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } - plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state); + plane_state->ctl = i9xx_cursor_ctl(plane_state); return 0; } @@ -675,7 +665,7 @@ static void i9xx_cursor_update_arm(struct intel_dsb *dsb, if (width != height) fbc_ctl = CUR_FBC_EN | CUR_FBC_HEIGHT(height - 1); - base = intel_cursor_base(plane_state); + base = plane_state->surf; pos = intel_cursor_position(crtc_state, plane_state, false); } @@ -1051,6 +1041,8 @@ intel_cursor_plane_create(struct intel_display *display, cursor->check_plane = i9xx_check_cursor; } + cursor->surf_offset = intel_cursor_surf_offset; + if (DISPLAY_VER(display) >= 5 || display->platform.g4x) cursor->capture_error = g4x_cursor_capture_error; else diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy.c b/drivers/gpu/drm/i915/display/intel_cx0_phy.c index 83c8df9dbc0c..801235a5bc0a 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy.c +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy.c @@ -39,7 +39,13 @@ bool intel_encoder_is_c10phy(struct intel_encoder *encoder) struct intel_display *display = to_intel_display(encoder); enum phy phy = intel_encoder_to_phy(encoder); - if (display->platform.pantherlake && phy == PHY_A) + /* PTL doesn't have a PHY connected to PORT B; as such, + * there will never be a case where PTL uses PHY B. + * WCL uses PORT A and B with the C10 PHY. + * Reusing the condition for WCL and extending it for PORT B + * should not cause any issues for PTL. + */ + if (display->platform.pantherlake && phy < PHY_C) return true; if ((display->platform.lunarlake || display->platform.meteorlake) && phy < PHY_C) @@ -3233,14 +3239,22 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct intel_display *display = to_intel_display(encoder); - u8 owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); - bool enable = intel_alpm_is_alpm_aux_less(enc_to_intel_dp(encoder), - crtc_state); + intel_wakeref_t wakeref; int i; + u8 owned_lane_mask; - if (DISPLAY_VER(display) < 20) + if (DISPLAY_VER(display) < 20 || + !intel_alpm_is_alpm_aux_less(enc_to_intel_dp(encoder), crtc_state)) return; + owned_lane_mask = intel_cx0_get_owned_lane_mask(encoder); + + wakeref = intel_cx0_phy_transaction_begin(encoder); + + if (intel_encoder_is_c10phy(encoder)) + intel_cx0_rmw(encoder, owned_lane_mask, PHY_C10_VDR_CONTROL(1), 0, + C10_VDR_CTRL_MSGBUS_ACCESS, MB_WRITE_COMMITTED); + for (i = 0; i < 4; i++) { int tx = i % 2 + 1; u8 lane_mask = i < 2 ? INTEL_CX0_LANE0 : INTEL_CX0_LANE1; @@ -3250,9 +3264,10 @@ void intel_lnl_mac_transmit_lfps(struct intel_encoder *encoder, intel_cx0_rmw(encoder, lane_mask, PHY_CMN1_CONTROL(tx, 0), CONTROL0_MAC_TRANSMIT_LFPS, - enable ? CONTROL0_MAC_TRANSMIT_LFPS : 0, - MB_WRITE_COMMITTED); + CONTROL0_MAC_TRANSMIT_LFPS, MB_WRITE_COMMITTED); } + + intel_cx0_phy_transaction_end(encoder, wakeref); } static u8 cx0_power_control_disable_val(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index cbd1060e9664..c09aa759f4d4 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -26,6 +26,7 @@ */ #include <linux/iopoll.h> +#include <linux/seq_buf.h> #include <linux/string_helpers.h> #include <drm/display/drm_dp_helper.h> @@ -73,11 +74,13 @@ #include "intel_lspcon.h" #include "intel_mg_phy_regs.h" #include "intel_modeset_lock.h" +#include "intel_panel.h" #include "intel_pfit.h" #include "intel_pps.h" #include "intel_psr.h" #include "intel_quirks.h" #include "intel_snps_phy.h" +#include "intel_step.h" #include "intel_tc.h" #include "intel_vdsc.h" #include "intel_vdsc_regs.h" @@ -594,8 +597,9 @@ intel_ddi_transcoder_func_reg_val_get(struct intel_encoder *encoder, enum transcoder master; master = crtc_state->mst_master_transcoder; - drm_WARN_ON(display->drm, - master == INVALID_TRANSCODER); + if (drm_WARN_ON(display->drm, + master == INVALID_TRANSCODER)) + master = TRANSCODER_A; temp |= TRANS_DDI_MST_TRANSPORT_SELECT(master); } } else { @@ -1394,6 +1398,21 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, for (ln = 0; ln < 2; ln++) { int level; + /* Wa_16011342517:adl-p */ + if (display->platform.alderlake_p && + IS_DISPLAY_STEP(display, STEP_A0, STEP_D0)) { + if ((intel_encoder_is_hdmi(encoder) && + crtc_state->port_clock == 594000) || + (intel_encoder_is_dp(encoder) && + crtc_state->port_clock == 162000)) { + intel_dkl_phy_rmw(display, DKL_TX_DPCNTL2(tc_port, ln), + LOADGEN_SHARING_PMD_DISABLE, 1); + } else { + intel_dkl_phy_rmw(display, DKL_TX_DPCNTL2(tc_port, ln), + LOADGEN_SHARING_PMD_DISABLE, 0); + } + } + intel_dkl_phy_write(display, DKL_TX_PMD_LANE_SUS(tc_port, ln), 0); level = intel_ddi_level(encoder, crtc_state, 2*ln+0); @@ -2149,7 +2168,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, { struct intel_display *display = to_intel_display(crtc_state); enum tc_port tc_port = intel_encoder_to_tc(&dig_port->base); - u32 ln0, ln1, pin_assignment; + enum intel_tc_pin_assignment pin_assignment; + u32 ln0, ln1; u8 width; if (DISPLAY_VER(display) >= 14) @@ -2171,11 +2191,11 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); /* DPPATC */ - pin_assignment = intel_tc_port_get_pin_assignment_mask(dig_port); + pin_assignment = intel_tc_port_get_pin_assignment(dig_port); width = crtc_state->lane_count; switch (pin_assignment) { - case 0x0: + case INTEL_TC_PIN_ASSIGNMENT_NONE: drm_WARN_ON(display->drm, !intel_tc_port_in_legacy_mode(dig_port)); if (width == 1) { @@ -2185,20 +2205,20 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x1: + case INTEL_TC_PIN_ASSIGNMENT_A: if (width == 4) { ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x2: + case INTEL_TC_PIN_ASSIGNMENT_B: if (width == 2) { ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x3: - case 0x5: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_E: if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; @@ -2207,8 +2227,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - case 0x4: - case 0x6: + case INTEL_TC_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_F: if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; @@ -2322,34 +2342,24 @@ static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, drm_dbg_kms(display->drm, "Failed to clear FEC detected flags\n"); } -static int read_fec_detected_status(struct drm_dp_aux *aux) -{ - int ret; - u8 status; - - ret = drm_dp_dpcd_readb(aux, DP_FEC_STATUS, &status); - if (ret < 0) - return ret; - - return status; -} - static int wait_for_fec_detected(struct drm_dp_aux *aux, bool enabled) { struct intel_display *display = to_intel_display(aux->drm_dev); int mask = enabled ? DP_FEC_DECODE_EN_DETECTED : DP_FEC_DECODE_DIS_DETECTED; - int status; - int err; + u8 status = 0; + int ret, err; - err = readx_poll_timeout(read_fec_detected_status, aux, status, - status & mask || status < 0, - 10000, 200000); + ret = poll_timeout_us(err = drm_dp_dpcd_read_byte(aux, DP_FEC_STATUS, &status), + err || (status & mask), + 10 * 1000, 200 * 1000, false); - if (err || status < 0) { + /* Either can be non-zero, but not both */ + ret = ret ?: err; + if (ret) { drm_dbg_kms(display->drm, - "Failed waiting for FEC %s to get detected: %d (status %d)\n", - str_enabled_disabled(enabled), err, status); - return err ? err : status; + "Failed waiting for FEC %s to get detected: %d (status 0x%02x)\n", + str_enabled_disabled(enabled), ret, status); + return ret; } return 0; @@ -2544,6 +2554,7 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) enum port port = encoder->port; i915_reg_t reg; u32 set_bits, wait_bits; + int ret; if (DISPLAY_VER(display) < 14) return; @@ -2559,7 +2570,11 @@ mtl_ddi_enable_d2d(struct intel_encoder *encoder) } intel_de_rmw(display, reg, 0, set_bits); - if (wait_for_us(intel_de_read(display, reg) & wait_bits, 100)) { + + ret = intel_de_wait_custom(display, reg, + wait_bits, wait_bits, + 100, 0, NULL); + if (ret) { drm_err(display->drm, "Timeout waiting for D2D Link enable for DDI/PORT_BUF_CTL %c\n", port_name(port)); } @@ -3041,6 +3056,7 @@ mtl_ddi_disable_d2d(struct intel_encoder *encoder) enum port port = encoder->port; i915_reg_t reg; u32 clr_bits, wait_bits; + int ret; if (DISPLAY_VER(display) < 14) return; @@ -3056,7 +3072,11 @@ mtl_ddi_disable_d2d(struct intel_encoder *encoder) } intel_de_rmw(display, reg, clr_bits, 0); - if (wait_for_us(!(intel_de_read(display, reg) & wait_bits), 100)) + + ret = intel_de_wait_custom(display, reg, + wait_bits, 0, + 100, 0, NULL); + if (ret) drm_err(display->drm, "Timeout waiting for D2D Link disable for DDI/PORT_BUF_CTL %c\n", port_name(port)); } @@ -3355,6 +3375,8 @@ static void intel_ddi_enable_dp(struct intel_atomic_state *state, drm_connector_update_privacy_screen(conn_state); intel_edp_backlight_on(crtc_state, conn_state); + intel_panel_prepare(crtc_state, conn_state); + if (!intel_lspcon_active(dig_port) || intel_dp_has_hdmi_sink(&dig_port->dp)) intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); @@ -3552,6 +3574,7 @@ static void intel_ddi_disable_dp(struct intel_atomic_state *state, intel_dp->link.active = false; + intel_panel_unprepare(old_conn_state); intel_psr_disable(intel_dp, old_crtc_state); intel_alpm_disable(intel_dp); intel_edp_backlight_off(old_conn_state); @@ -5046,11 +5069,45 @@ static bool port_in_use(struct intel_display *display, enum port port) return false; } +static const char *intel_ddi_encoder_name(struct intel_display *display, + enum port port, enum phy phy, + struct seq_buf *s) +{ + if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { + seq_buf_printf(s, "DDI %c/PHY %c", + port_name(port - PORT_D_XELPD + PORT_D), + phy_name(phy)); + } else if (DISPLAY_VER(display) >= 12) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %s%c/PHY %s%c", + port >= PORT_TC1 ? "TC" : "", + port >= PORT_TC1 ? port_tc_name(port) : port_name(port), + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else if (DISPLAY_VER(display) >= 11) { + enum tc_port tc_port = intel_port_to_tc(display, port); + + seq_buf_printf(s, "DDI %c%s/PHY %s%c", + port_name(port), + port >= PORT_C ? " (TC)" : "", + tc_port != TC_PORT_NONE ? "TC" : "", + tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); + } else { + seq_buf_printf(s, "DDI %c/PHY %c", port_name(port), phy_name(phy)); + } + + drm_WARN_ON(display->drm, seq_buf_has_overflowed(s)); + + return seq_buf_str(s); +} + void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { struct intel_digital_port *dig_port; struct intel_encoder *encoder; + DECLARE_SEQ_BUF(encoder_name, 20); bool init_hdmi, init_dp; enum port port; enum phy phy; @@ -5128,52 +5185,19 @@ void intel_ddi_init(struct intel_display *display, phy_name(phy)); } - dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + dig_port = intel_dig_port_alloc(); if (!dig_port) return; - dig_port->aux_ch = AUX_CH_NONE; - encoder = &dig_port->base; encoder->devdata = devdata; - if (DISPLAY_VER(display) >= 13 && port >= PORT_D_XELPD) { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", - port_name(port - PORT_D_XELPD + PORT_D), - phy_name(phy)); - } else if (DISPLAY_VER(display) >= 12) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %s%c/PHY %s%c", - port >= PORT_TC1 ? "TC" : "", - port >= PORT_TC1 ? port_tc_name(port) : port_name(port), - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else if (DISPLAY_VER(display) >= 11) { - enum tc_port tc_port = intel_port_to_tc(display, port); - - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c%s/PHY %s%c", - port_name(port), - port >= PORT_C ? " (TC)" : "", - tc_port != TC_PORT_NONE ? "TC" : "", - tc_port != TC_PORT_NONE ? tc_port_name(tc_port) : phy_name(phy)); - } else { - drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, - DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", port_name(port), phy_name(phy)); - } + drm_encoder_init(display->drm, &encoder->base, &intel_ddi_funcs, + DRM_MODE_ENCODER_TMDS, "%s", + intel_ddi_encoder_name(display, port, phy, &encoder_name)); intel_encoder_link_check_init(encoder, intel_ddi_link_check); - mutex_init(&dig_port->hdcp.mutex); - dig_port->hdcp.num_streams = 0; - encoder->hotplug = intel_ddi_hotplug; encoder->compute_output_type = intel_ddi_compute_output_type; encoder->compute_config = intel_ddi_compute_config; @@ -5311,7 +5335,6 @@ void intel_ddi_init(struct intel_display *display, dig_port->ddi_a_4_lanes = DISPLAY_VER(display) < 11 && ddi_buf_ctl & DDI_A_4_LANES; - dig_port->dp.output_reg = INVALID_MMIO_REG; dig_port->max_lanes = intel_ddi_max_lanes(dig_port); if (need_aux_ch(encoder, init_dp)) { diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 6ec786198f43..5dca7f96b425 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -57,7 +57,6 @@ #include "i9xx_wm.h" #include "intel_alpm.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_audio.h" #include "intel_bo.h" #include "intel_bw.h" @@ -77,6 +76,7 @@ #include "intel_display_regs.h" #include "intel_display_rpm.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_dmc.h" #include "intel_dp.h" #include "intel_dp_link_training.h" @@ -94,6 +94,7 @@ #include "intel_fbc.h" #include "intel_fdi.h" #include "intel_fifo_underrun.h" +#include "intel_flipq.h" #include "intel_frontbuffer.h" #include "intel_hdmi.h" #include "intel_hotplug.h" @@ -108,6 +109,7 @@ #include "intel_pch_refclk.h" #include "intel_pfit.h" #include "intel_pipe_crc.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "intel_pmdemand.h" #include "intel_pps.h" @@ -1080,6 +1082,11 @@ static void intel_post_plane_update(struct intel_atomic_state *state, if (audio_enabling(old_crtc_state, new_crtc_state)) intel_encoders_audio_enable(state, crtc); + if (intel_display_wa(display, 14011503117)) { + if (old_crtc_state->pch_pfit.enabled != new_crtc_state->pch_pfit.enabled) + adl_scaler_ecc_unmask(new_crtc_state); + } + intel_alpm_post_plane_update(state, crtc); intel_psr_post_plane_update(state, crtc); @@ -1659,8 +1666,12 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, if (drm_WARN_ON(display->drm, crtc->active)) return; - for_each_pipe_crtc_modeset_enable(display, pipe_crtc, new_crtc_state, i) - intel_dmc_enable_pipe(display, pipe_crtc->pipe); + for_each_pipe_crtc_modeset_enable(display, pipe_crtc, new_crtc_state, i) { + const struct intel_crtc_state *new_pipe_crtc_state = + intel_atomic_get_new_crtc_state(state, pipe_crtc); + + intel_dmc_enable_pipe(new_pipe_crtc_state); + } intel_encoders_pre_pll_enable(state, crtc); @@ -1798,8 +1809,12 @@ static void hsw_crtc_disable(struct intel_atomic_state *state, intel_encoders_post_pll_disable(state, crtc); - for_each_pipe_crtc_modeset_disable(display, pipe_crtc, old_crtc_state, i) - intel_dmc_disable_pipe(display, pipe_crtc->pipe); + for_each_pipe_crtc_modeset_disable(display, pipe_crtc, old_crtc_state, i) { + const struct intel_crtc_state *old_pipe_crtc_state = + intel_atomic_get_old_crtc_state(state, pipe_crtc); + + intel_dmc_disable_pipe(old_pipe_crtc_state); + } } /* Prefer intel_encoder_is_combo() */ @@ -4160,7 +4175,7 @@ static u16 hsw_ips_linetime_wm(const struct intel_crtc_state *crtc_state, return 0; linetime_wm = DIV_ROUND_CLOSEST(pipe_mode->crtc_htotal * 1000 * 8, - cdclk_state->logical.cdclk); + intel_cdclk_logical(cdclk_state)); return min(linetime_wm, 0x1ff); } @@ -5479,7 +5494,7 @@ static int intel_modeset_pipe(struct intel_atomic_state *state, if (ret) return ret; - ret = intel_atomic_add_affected_planes(state, crtc); + ret = intel_plane_add_affected(state, crtc); if (ret) return ret; @@ -6195,7 +6210,7 @@ static int intel_joiner_add_affected_crtcs(struct intel_atomic_state *state) if (ret) return ret; - ret = intel_atomic_add_affected_planes(state, crtc); + ret = intel_plane_add_affected(state, crtc); if (ret) return ret; } @@ -6447,7 +6462,7 @@ int intel_atomic_check(struct drm_device *dev, goto fail; } - ret = intel_atomic_check_planes(state); + ret = intel_plane_atomic_check(state); if (ret) goto fail; @@ -6611,7 +6626,7 @@ static void commit_pipe_pre_planes(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); bool modeset = intel_crtc_needs_modeset(new_crtc_state); - drm_WARN_ON(display->drm, new_crtc_state->use_dsb); + drm_WARN_ON(display->drm, new_crtc_state->use_dsb || new_crtc_state->use_flipq); /* * During modesets pipe configuration was programmed as the @@ -6641,7 +6656,7 @@ static void commit_pipe_post_planes(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); bool modeset = intel_crtc_needs_modeset(new_crtc_state); - drm_WARN_ON(display->drm, new_crtc_state->use_dsb); + drm_WARN_ON(display->drm, new_crtc_state->use_dsb || new_crtc_state->use_flipq); /* * Disable the scaler(s) after the plane(s) so that we don't @@ -6730,10 +6745,10 @@ static void intel_pre_update_crtc(struct intel_atomic_state *state, if (!modeset && intel_crtc_needs_color_update(new_crtc_state) && - !new_crtc_state->use_dsb) + !new_crtc_state->use_dsb && !new_crtc_state->use_flipq) intel_color_commit_noarm(NULL, new_crtc_state); - if (!new_crtc_state->use_dsb) + if (!new_crtc_state->use_dsb && !new_crtc_state->use_flipq) intel_crtc_planes_update_noarm(NULL, state, crtc); } @@ -6745,7 +6760,14 @@ static void intel_update_crtc(struct intel_atomic_state *state, struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (new_crtc_state->use_dsb) { + if (new_crtc_state->use_flipq) { + intel_flipq_enable(new_crtc_state); + + intel_crtc_prepare_vblank_event(new_crtc_state, &crtc->flipq_event); + + intel_flipq_add(crtc, INTEL_FLIPQ_PLANE_1, 0, INTEL_DSB_0, + new_crtc_state->dsb_commit); + } else if (new_crtc_state->use_dsb) { intel_crtc_prepare_vblank_event(new_crtc_state, &crtc->dsb_event); intel_dsb_commit(new_crtc_state->dsb_commit); @@ -7076,7 +7098,8 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat struct drm_i915_private *i915 = to_i915(intel_state->base.dev); struct drm_plane *plane; struct drm_plane_state *new_plane_state; - int ret, i; + long ret; + int i; for_each_new_plane_in_state(&intel_state->base, plane, new_plane_state, i) { if (new_plane_state->fence) { @@ -7183,7 +7206,17 @@ static void intel_atomic_dsb_prepare(struct intel_atomic_state *state, return; /* FIXME deal with everything */ + new_crtc_state->use_flipq = + intel_flipq_supported(display) && + !new_crtc_state->do_async_flip && + !new_crtc_state->vrr.enable && + !new_crtc_state->has_psr && + !intel_crtc_needs_modeset(new_crtc_state) && + !intel_crtc_needs_fastset(new_crtc_state) && + !intel_crtc_needs_color_update(new_crtc_state); + new_crtc_state->use_dsb = + !new_crtc_state->use_flipq && !new_crtc_state->do_async_flip && (DISPLAY_VER(display) >= 20 || !new_crtc_state->has_psr) && !intel_crtc_needs_modeset(new_crtc_state) && @@ -7199,7 +7232,9 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (!new_crtc_state->use_dsb && !new_crtc_state->dsb_color) + if (!new_crtc_state->use_flipq && + !new_crtc_state->use_dsb && + !new_crtc_state->dsb_color) return; /* @@ -7208,14 +7243,20 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, * Double that for pipe stuff and other overhead. */ new_crtc_state->dsb_commit = intel_dsb_prepare(state, crtc, INTEL_DSB_0, - new_crtc_state->use_dsb ? 1024 : 16); + new_crtc_state->use_dsb || + new_crtc_state->use_flipq ? 1024 : 16); if (!new_crtc_state->dsb_commit) { + new_crtc_state->use_flipq = false; new_crtc_state->use_dsb = false; intel_color_cleanup_commit(new_crtc_state); return; } - if (new_crtc_state->use_dsb) { + if (new_crtc_state->use_flipq || new_crtc_state->use_dsb) { + /* Wa_18034343758 */ + if (new_crtc_state->use_flipq) + intel_flipq_wait_dmc_halt(new_crtc_state->dsb_commit, crtc); + if (intel_crtc_needs_color_update(new_crtc_state)) intel_color_commit_noarm(new_crtc_state->dsb_commit, new_crtc_state); @@ -7230,7 +7271,11 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, intel_psr_trigger_frame_change_event(new_crtc_state->dsb_commit, state, crtc); - intel_dsb_vblank_evade(state, new_crtc_state->dsb_commit); + intel_psr_wait_for_idle_dsb(new_crtc_state->dsb_commit, + new_crtc_state); + + if (new_crtc_state->use_dsb) + intel_dsb_vblank_evade(state, new_crtc_state->dsb_commit); if (intel_crtc_needs_color_update(new_crtc_state)) intel_color_commit_arm(new_crtc_state->dsb_commit, @@ -7245,6 +7290,10 @@ static void intel_atomic_dsb_finish(struct intel_atomic_state *state, if (DISPLAY_VER(display) >= 9) skl_detach_scalers(new_crtc_state->dsb_commit, new_crtc_state); + + /* Wa_18034343758 */ + if (new_crtc_state->use_flipq) + intel_flipq_unhalt_dmc(new_crtc_state->dsb_commit, crtc); } if (intel_color_uses_chained_dsb(new_crtc_state)) @@ -7385,6 +7434,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) /* Now enable the clocks, plane, pipe, and connectors that we set up. */ display->funcs.display->commit_modeset_enables(state); + /* FIXME probably need to sequence this properly */ intel_program_dpkgc_latency(state); intel_wait_for_vblank_workers(state); @@ -7408,6 +7458,9 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (!state->base.legacy_cursor_update && !new_crtc_state->use_dsb) intel_vrr_check_push_sent(NULL, new_crtc_state); + + if (new_crtc_state->use_flipq) + intel_flipq_disable(new_crtc_state); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.c b/drivers/gpu/drm/i915/display/intel_display_conversion.c index 4d565935e2cc..d56065f22655 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.c +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.c @@ -4,7 +4,7 @@ #include "i915_drv.h" #include "intel_display_conversion.h" -struct intel_display *__i915_to_display(struct drm_i915_private *i915) +static struct intel_display *__i915_to_display(struct drm_i915_private *i915) { return i915->display; } diff --git a/drivers/gpu/drm/i915/display/intel_display_conversion.h b/drivers/gpu/drm/i915/display/intel_display_conversion.h index 46c7208d42ba..d497bc58a73f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_conversion.h +++ b/drivers/gpu/drm/i915/display/intel_display_conversion.h @@ -9,20 +9,8 @@ #define __INTEL_DISPLAY_CONVERSION__ struct drm_device; -struct drm_i915_private; struct intel_display; -struct intel_display *__i915_to_display(struct drm_i915_private *i915); struct intel_display *__drm_to_display(struct drm_device *drm); -/* - * Transitional macro to optionally convert struct drm_i915_private * to struct - * intel_display *, also accepting the latter. - */ -#define __to_intel_display(p) \ - _Generic(p, \ - const struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ - struct drm_i915_private *: __i915_to_display((struct drm_i915_private *)(p)), \ - const struct intel_display *: (p), \ - struct intel_display *: (p)) #endif /* __INTEL_DISPLAY_CONVERSION__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 32cb0e59c81e..8c226406c5cd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -480,6 +480,12 @@ struct intel_display { } irq; struct { + /* protected by wm.wm_mutex */ + u16 linetime[I915_MAX_PIPES]; + bool disable[I915_MAX_PIPES]; + } pkgc; + + struct { wait_queue_head_t waitqueue; /* mutex to protect pmdemand programming sequence */ @@ -570,6 +576,9 @@ struct intel_display { /* hipri wq for commit cleanups */ struct workqueue_struct *cleanup; + + /* unordered workqueue for all display unordered work */ + struct workqueue_struct *unordered; } wq; /* Grouping using named structs. Keep sorted. */ diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index ce3f9810c42d..10dddec3796f 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -820,14 +820,14 @@ static const struct drm_info_list intel_display_debugfs_list[] = { void intel_display_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_fifo_underrun_reset", 0644, minor->debugfs_root, + debugfs_create_file("i915_fifo_underrun_reset", 0644, debugfs_root, display, &i915_fifo_underrun_reset_ops); drm_debugfs_create_files(intel_display_debugfs_list, ARRAY_SIZE(intel_display_debugfs_list), - minor->debugfs_root, minor); + debugfs_root, display->drm->primary); intel_bios_debugfs_register(display); intel_cdclk_debugfs_register(display); diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c index 88914a1f3f62..de62b774272d 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -7,7 +7,6 @@ #include <linux/kernel.h> #include <drm/drm_drv.h> -#include <drm/drm_file.h> #include "intel_display_core.h" #include "intel_display_debugfs_params.h" @@ -154,14 +153,14 @@ intel_display_debugfs_create_uint(const char *name, umode_t mode, /* add a subdirectory with files for each intel display param */ void intel_display_debugfs_params(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; struct dentry *dir; char dirname[16]; snprintf(dirname, sizeof(dirname), "%s_params", display->drm->driver->name); - dir = debugfs_lookup(dirname, minor->debugfs_root); + dir = debugfs_lookup(dirname, debugfs_root); if (!dir) - dir = debugfs_create_dir(dirname, minor->debugfs_root); + dir = debugfs_create_dir(dirname, debugfs_root); if (IS_ERR(dir)) return; diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index a4070f40e26f..a002bc6ce7b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -1354,6 +1354,19 @@ static const struct intel_display_device_info xe2_lpd_display = { .__runtime_defaults.has_dbuf_overlap_detection = true, }; +static const struct intel_display_device_info wcl_display = { + XE_LPDP_FEATURES, + + .__runtime_defaults.cpu_transcoder_mask = + BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | BIT(TRANSCODER_C), + .__runtime_defaults.pipe_mask = + BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .__runtime_defaults.fbc_mask = + BIT(INTEL_FBC_A) | BIT(INTEL_FBC_B) | BIT(INTEL_FBC_C), + .__runtime_defaults.port_mask = + BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), +}; + static const struct intel_display_device_info xe2_hpd_display = { XE_LPDP_FEATURES, .__runtime_defaults.port_mask = BIT(PORT_A) | @@ -1480,6 +1493,7 @@ static const struct { { 14, 1, &xe2_hpd_display }, { 20, 0, &xe2_lpd_display }, { 30, 0, &xe2_lpd_display }, + { 30, 2, &wcl_display }, }; static const struct intel_display_device_info * @@ -1930,6 +1944,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } +bool intel_display_device_present(struct intel_display *display) +{ + return display && HAS_DISPLAY(display); +} + /* * Assuming the device has display hardware, should it be enabled? * diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 0ac5484c0043..f329f1beafef 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -9,7 +9,6 @@ #include <linux/bitops.h> #include <linux/types.h> -#include "intel_display_conversion.h" #include "intel_display_limits.h" struct drm_printer; @@ -192,9 +191,8 @@ struct intel_display_platforms { #define HAS_TRANSCODER(__display, trans) ((DISPLAY_RUNTIME_INFO(__display)->cpu_transcoder_mask & \ BIT(trans)) != 0) #define HAS_UNCOMPRESSED_JOINER(__display) (DISPLAY_VER(__display) >= 13) -#define HAS_ULTRAJOINER(__display) ((DISPLAY_VER(__display) >= 20 || \ - ((__display)->platform.dgfx && DISPLAY_VER(__display) == 14)) && \ - HAS_DSC(__display)) +#define HAS_ULTRAJOINER(__display) (((__display)->platform.dgfx && \ + DISPLAY_VER(__display) == 14) && HAS_DSC(__display)) #define HAS_VRR(__display) (DISPLAY_VER(__display) >= 11) #define INTEL_NUM_PIPES(__display) (hweight8(DISPLAY_RUNTIME_INFO(__display)->pipe_mask)) #define OVERLAY_NEEDS_PHYSICAL(__display) (DISPLAY_INFO(__display)->overlay_needs_physical) @@ -225,8 +223,8 @@ struct intel_display_platforms { (IS_DISPLAY_VERx100((__display), (ipver), (ipver)) && \ IS_DISPLAY_STEP((__display), (from), (until))) -#define DISPLAY_INFO(__display) (__to_intel_display(__display)->info.__device_info) -#define DISPLAY_RUNTIME_INFO(__display) (&__to_intel_display(__display)->info.__runtime_info) +#define DISPLAY_INFO(__display) ((__display)->info.__device_info) +#define DISPLAY_RUNTIME_INFO(__display) (&(__display)->info.__runtime_info) #define DISPLAY_VER(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver) #define DISPLAY_VERx100(__display) (DISPLAY_RUNTIME_INFO(__display)->ip.ver * 100 + \ @@ -237,7 +235,7 @@ struct intel_display_platforms { #define INTEL_DISPLAY_STEP(__display) (DISPLAY_RUNTIME_INFO(__display)->step) #define IS_DISPLAY_STEP(__display, since, until) \ - (drm_WARN_ON(__to_intel_display(__display)->drm, INTEL_DISPLAY_STEP(__display) == STEP_NONE), \ + (drm_WARN_ON((__display)->drm, INTEL_DISPLAY_STEP(__display) == STEP_NONE), \ INTEL_DISPLAY_STEP(__display) >= (since) && INTEL_DISPLAY_STEP(__display) < (until)) #define ARLS_HOST_BRIDGE_PCI_ID1 0x7D1C @@ -308,6 +306,7 @@ struct intel_display_device_info { } color; }; +bool intel_display_device_present(struct intel_display *display); bool intel_display_device_enabled(struct intel_display *display); struct intel_display *intel_display_device_probe(struct pci_dev *pdev); void intel_display_device_remove(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index ec799a1773e4..cf1c14412abe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -18,6 +18,7 @@ #include <drm/drm_vblank.h> #include "i915_drv.h" +#include "i915_utils.h" #include "i9xx_wm.h" #include "intel_acpi.h" #include "intel_atomic.h" @@ -44,6 +45,7 @@ #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fdi.h" +#include "intel_flipq.h" #include "intel_gmbus.h" #include "intel_hdcp.h" #include "intel_hotplug.h" @@ -84,16 +86,10 @@ bool intel_display_driver_probe_defer(struct pci_dev *pdev) void intel_display_driver_init_hw(struct intel_display *display) { - struct intel_cdclk_state *cdclk_state; - if (!HAS_DISPLAY(display)) return; - cdclk_state = to_intel_cdclk_state(display->cdclk.obj.state); - - intel_update_cdclk(display); - intel_cdclk_dump_config(display, &display->cdclk.hw, "Current CDCLK"); - cdclk_state->logical = cdclk_state->actual = display->cdclk.hw; + intel_cdclk_read_hw(display); intel_display_wa_apply(display); } @@ -242,8 +238,6 @@ int intel_display_driver_probe_noirq(struct intel_display *display) if (!HAS_DISPLAY(display)) return 0; - intel_dmc_init(display); - display->hotplug.dp_wq = alloc_ordered_workqueue("intel-dp", 0); if (!display->hotplug.dp_wq) { ret = -ENOMEM; @@ -269,27 +263,35 @@ int intel_display_driver_probe_noirq(struct intel_display *display) goto cleanup_wq_flip; } + display->wq.unordered = alloc_workqueue("display_unordered", 0, 0); + if (!display->wq.unordered) { + ret = -ENOMEM; + goto cleanup_wq_cleanup; + } + + intel_dmc_init(display); + intel_mode_config_init(display); ret = intel_cdclk_init(display); if (ret) - goto cleanup_wq_cleanup; + goto cleanup_wq_unordered; ret = intel_color_init(display); if (ret) - goto cleanup_wq_cleanup; + goto cleanup_wq_unordered; ret = intel_dbuf_init(display); if (ret) - goto cleanup_wq_cleanup; + goto cleanup_wq_unordered; ret = intel_bw_init(display); if (ret) - goto cleanup_wq_cleanup; + goto cleanup_wq_unordered; ret = intel_pmdemand_init(display); if (ret) - goto cleanup_wq_cleanup; + goto cleanup_wq_unordered; intel_init_quirks(display); @@ -297,6 +299,8 @@ int intel_display_driver_probe_noirq(struct intel_display *display) return 0; +cleanup_wq_unordered: + destroy_workqueue(display->wq.unordered); cleanup_wq_cleanup: destroy_workqueue(display->wq.cleanup); cleanup_wq_flip: @@ -535,6 +539,8 @@ int intel_display_driver_probe(struct intel_display *display) */ intel_hdcp_component_init(display); + intel_flipq_init(display); + /* * Force all active planes to recompute their states. So that on * mode_setcrtc after probe, all the intel_plane_state variables @@ -600,6 +606,7 @@ void intel_display_driver_remove(struct intel_display *display) flush_workqueue(display->wq.flip); flush_workqueue(display->wq.modeset); flush_workqueue(display->wq.cleanup); + flush_workqueue(display->wq.unordered); /* * MST topology needs to be suspended so we don't have any calls to @@ -612,8 +619,6 @@ void intel_display_driver_remove(struct intel_display *display) /* part #2: call after irq uninstall */ void intel_display_driver_remove_noirq(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (!HAS_DISPLAY(display)) return; @@ -628,7 +633,7 @@ void intel_display_driver_remove_noirq(struct intel_display *display) intel_unregister_dsm_handler(); /* flush any delayed tasks or pending work */ - flush_workqueue(i915->unordered_wq); + flush_workqueue(display->wq.unordered); intel_hdcp_component_fini(display); @@ -644,6 +649,7 @@ void intel_display_driver_remove_noirq(struct intel_display *display) destroy_workqueue(display->wq.flip); destroy_workqueue(display->wq.modeset); destroy_workqueue(display->wq.cleanup); + destroy_workqueue(display->wq.unordered); intel_fbc_cleanup(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 8d0dcf252bed..123e054affbe 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -9,7 +9,6 @@ #include "i915_irq.h" #include "i915_reg.h" #include "icl_dsi_regs.h" -#include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_irq.h" @@ -27,6 +26,7 @@ #include "intel_gmbus.h" #include "intel_hotplug_irq.h" #include "intel_pipe_crc_regs.h" +#include "intel_plane.h" #include "intel_pmdemand.h" #include "intel_psr.h" #include "intel_psr_regs.h" @@ -1506,10 +1506,14 @@ u32 gen11_gu_misc_irq_ack(struct intel_display *display, const u32 master_ctl) if (!(master_ctl & GEN11_GU_MISC_IRQ)) return 0; + intel_display_rpm_assert_block(display); + iir = intel_de_read(display, GEN11_GU_MISC_IIR); if (likely(iir)) intel_de_write(display, GEN11_GU_MISC_IIR, iir); + intel_display_rpm_assert_unblock(display); + return iir; } @@ -1986,20 +1990,17 @@ void vlv_display_irq_postinstall(struct intel_display *display) void ibx_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - - if (HAS_PCH_NOP(i915)) + if (HAS_PCH_NOP(display)) return; gen2_irq_reset(to_intel_uncore(display->drm), SDE_IRQ_REGS); - if (HAS_PCH_CPT(i915) || HAS_PCH_LPT(i915)) + if (HAS_PCH_CPT(display) || HAS_PCH_LPT(display)) intel_de_write(display, SERR_INT, 0xffffffff); } void gen8_display_irq_reset(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); enum pipe pipe; if (!HAS_DISPLAY(display)) @@ -2016,7 +2017,7 @@ void gen8_display_irq_reset(struct intel_display *display) intel_display_irq_regs_reset(display, GEN8_DE_PORT_IRQ_REGS); intel_display_irq_regs_reset(display, GEN8_DE_MISC_IRQ_REGS); - if (HAS_PCH_SPLIT(i915)) + if (HAS_PCH_SPLIT(display)) ibx_display_irq_reset(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c index c4f1ab43fc0c..2aed110c5b09 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -62,6 +62,9 @@ intel_display_param_named_unsafe(enable_dpt, bool, 0400, intel_display_param_named_unsafe(enable_dsb, bool, 0400, "Enable display state buffer (DSB) (default: true)"); +intel_display_param_named_unsafe(enable_flipq, bool, 0400, + "Enable DMC flip queue (default: false)"); + intel_display_param_named_unsafe(enable_sagv, bool, 0400, "Enable system agent voltage/frequency scaling (SAGV) (default: true)"); @@ -117,6 +120,9 @@ intel_display_param_named_unsafe(enable_psr, int, 0400, "(0=disabled, 1=enable up to PSR1, 2=enable up to PSR2) " "Default: -1 (use per-chip default)"); +intel_display_param_named_unsafe(enable_panel_replay, int, 0400, + "Enable Panel Replay (0=disabled, 1=enabled). Default: -1 (use per-chip default)"); + intel_display_param_named(psr_safest_params, bool, 0400, "Replace PSR VBT parameters by the safest and not optimal ones. This " "is helpful to detect if PSR issues are related to bad values set in " diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h index 5317138e6044..b01bc5700c52 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -31,6 +31,7 @@ struct drm_printer; param(int, enable_dc, -1, 0400) \ param(bool, enable_dpt, true, 0400) \ param(bool, enable_dsb, true, 0600) \ + param(bool, enable_flipq, false, 0600) \ param(bool, enable_sagv, true, 0600) \ param(int, disable_power_well, -1, 0400) \ param(bool, enable_ips, true, 0600) \ @@ -45,6 +46,7 @@ struct drm_printer; param(bool, enable_dp_mst, true, 0600) \ param(int, enable_fbc, -1, 0600) \ param(int, enable_psr, -1, 0600) \ + param(int, enable_panel_replay, -1, 0600) \ param(bool, psr_safest_params, false, 0400) \ param(bool, enable_psr2_sel_fetch, true, 0400) \ param(int, enable_dmc_wl, -1, 0400) \ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index fe3a8e90b97a..da4babfd6bcb 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3,6 +3,7 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/iopoll.h> #include <linux/string_helpers.h> #include "soc/intel_dram.h" @@ -10,6 +11,7 @@ #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_backlight_regs.h" #include "intel_cdclk.h" #include "intel_clock_gating.h" @@ -1172,7 +1174,7 @@ static void icl_mbus_init(struct intel_display *display) if (DISPLAY_VER(display) == 12) abox_regs |= BIT(0); - for_each_set_bit(i, &abox_regs, sizeof(abox_regs)) + for_each_set_bit(i, &abox_regs, BITS_PER_TYPE(abox_regs)) intel_de_rmw(display, MBUS_ABOX_CTL(i), mask, val); } @@ -1257,10 +1259,8 @@ static u32 hsw_read_dcomp(struct intel_display *display) static void hsw_write_dcomp(struct intel_display *display, u32 val) { - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (display->platform.haswell) { - if (snb_pcode_write(&dev_priv->uncore, GEN6_PCODE_WRITE_D_COMP, val)) + if (intel_pcode_write(display->drm, GEN6_PCODE_WRITE_D_COMP, val)) drm_dbg_kms(display->drm, "Failed to write to D_COMP\n"); } else { intel_de_write(display, D_COMP_BDW, val); @@ -1280,6 +1280,7 @@ static void hsw_disable_lcpll(struct intel_display *display, bool switch_to_fclk, bool allow_power_down) { u32 val; + int ret; assert_can_disable_lcpll(display); @@ -1289,8 +1290,10 @@ static void hsw_disable_lcpll(struct intel_display *display, val |= LCPLL_CD_SOURCE_FCLK; intel_de_write(display, LCPLL_CTL, val); - if (wait_for_us(intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, LCPLL_CD_SOURCE_FCLK_DONE, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching to FCLK failed\n"); val = intel_de_read(display, LCPLL_CTL); @@ -1308,8 +1311,10 @@ static void hsw_disable_lcpll(struct intel_display *display, hsw_write_dcomp(display, val); ndelay(100); - if (wait_for((hsw_read_dcomp(display) & - D_COMP_RCOMP_IN_PROGRESS) == 0, 1)) + ret = poll_timeout_us(val = hsw_read_dcomp(display), + (val & D_COMP_RCOMP_IN_PROGRESS) == 0, + 100, 1000, false); + if (ret) drm_err(display->drm, "D_COMP RCOMP still in progress\n"); if (allow_power_down) { @@ -1326,6 +1331,7 @@ static void hsw_restore_lcpll(struct intel_display *display) { struct drm_i915_private __maybe_unused *dev_priv = to_i915(display->drm); u32 val; + int ret; val = intel_de_read(display, LCPLL_CTL); @@ -1360,8 +1366,10 @@ static void hsw_restore_lcpll(struct intel_display *display) if (val & LCPLL_CD_SOURCE_FCLK) { intel_de_rmw(display, LCPLL_CTL, LCPLL_CD_SOURCE_FCLK, 0); - if (wait_for_us((intel_de_read(display, LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + ret = intel_de_wait_custom(display, LCPLL_CTL, + LCPLL_CD_SOURCE_FCLK_DONE, 0, + 1, 0, NULL); + if (ret) drm_err(display->drm, "Switching back to LCPLL failed\n"); } @@ -1631,11 +1639,11 @@ static void tgl_bw_buddy_init(struct intel_display *display) if (table[config].page_mask == 0) { drm_dbg_kms(display->drm, "Unknown memory configuration; disabling address buddy logic.\n"); - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) intel_de_write(display, BW_BUDDY_CTL(i), BW_BUDDY_DISABLE); } else { - for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) { + for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) { intel_de_write(display, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); @@ -2157,8 +2165,6 @@ void intel_power_domains_resume(struct intel_display *display) power_domains->init_wakeref = intel_display_power_get(display, POWER_DOMAIN_INIT); } - - intel_power_domains_verify_state(display); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c index 77268802b55e..39b71fffa2cd 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_map.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c @@ -1717,6 +1717,59 @@ static const struct i915_power_well_desc_list xe3lpd_power_wells[] = { I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), }; +static const struct i915_power_well_desc wcl_power_wells_main[] = { + { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_2", &xe3lpd_pwdoms_pw_2, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .id = SKL_DISP_PW_2), + ), + .ops = &hsw_power_well_ops, + .has_vga = true, + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_A", &xelpd_pwdoms_pw_a, + .hsw.idx = XELPD_PW_CTL_IDX_PW_A), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_A), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_B", &xe3lpd_pwdoms_pw_b, + .hsw.idx = XELPD_PW_CTL_IDX_PW_B), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_B), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("PW_C", &xe3lpd_pwdoms_pw_c, + .hsw.idx = XELPD_PW_CTL_IDX_PW_C), + ), + .ops = &hsw_power_well_ops, + .irq_pipe_mask = BIT(PIPE_C), + .has_fuses = true, + }, { + .instances = &I915_PW_INSTANCES( + I915_PW("AUX_A", &icl_pwdoms_aux_a, .xelpdp.aux_ch = AUX_CH_A), + I915_PW("AUX_B", &icl_pwdoms_aux_b, .xelpdp.aux_ch = AUX_CH_B), + I915_PW("AUX_TC1", &xelpdp_pwdoms_aux_tc1, .xelpdp.aux_ch = AUX_CH_USBC1), + I915_PW("AUX_TC2", &xelpdp_pwdoms_aux_tc2, .xelpdp.aux_ch = AUX_CH_USBC2), + ), + .ops = &xelpdp_aux_power_well_ops, + }, +}; + +static const struct i915_power_well_desc_list wcl_power_wells[] = { + I915_PW_DESCRIPTORS(i9xx_power_wells_always_on), + I915_PW_DESCRIPTORS(icl_power_wells_pw_1), + I915_PW_DESCRIPTORS(xe3lpd_power_wells_dcoff), + I915_PW_DESCRIPTORS(wcl_power_wells_main), + I915_PW_DESCRIPTORS(xe2lpd_power_wells_pica), +}; + static void init_power_well_domains(const struct i915_power_well_instance *inst, struct i915_power_well *power_well) { @@ -1824,7 +1877,9 @@ int intel_display_power_map_init(struct i915_power_domains *power_domains) return 0; } - if (DISPLAY_VER(display) >= 30) + if (DISPLAY_VERx100(display) == 3002) + return set_power_wells(power_domains, wcl_power_wells); + else if (DISPLAY_VER(display) >= 30) return set_power_wells(power_domains, xe3lpd_power_wells); else if (DISPLAY_VER(display) >= 20) return set_power_wells(power_domains, xe2lpd_power_wells); diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index cba96f920fd2..5e88b930f5aa 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include <linux/iopoll.h> + #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" @@ -34,6 +36,18 @@ #include "vlv_iosf_sb_reg.h" #include "vlv_sideband.h" +/* + * PG0 is HW controlled, so doesn't have a corresponding power well control knob + * + * {ICL,SKL}_DISP_PW1_IDX..{ICL,SKL}_DISP_PW4_IDX -> PG1..PG4 + */ +static enum skl_power_gate pw_idx_to_pg(struct intel_display *display, int pw_idx) +{ + int pw1_idx = DISPLAY_VER(display) >= 11 ? ICL_PW_CTL_IDX_PW_1 : SKL_PW_CTL_IDX_PW_1; + + return pw_idx - pw1_idx + SKL_PG1; +} + struct i915_power_well_regs { i915_reg_t bios; i915_reg_t driver; @@ -308,8 +322,8 @@ static void hsw_wait_for_power_well_disable(struct intel_display *display, { const struct i915_power_well_regs *regs = power_well->desc->ops->regs; int pw_idx = i915_power_well_instance(power_well)->hsw.idx; - bool disabled; u32 reqs; + int ret; /* * Bspec doesn't require waiting for PWs to get disabled, but still do @@ -320,12 +334,18 @@ static void hsw_wait_for_power_well_disable(struct intel_display *display, * Skip the wait in case any of the request bits are set and print a * diagnostic message. */ - wait_for((disabled = !(intel_de_read(display, regs->driver) & - HSW_PWR_WELL_CTL_STATE(pw_idx))) || - (reqs = hsw_power_well_requesters(display, regs, pw_idx)), 1); - if (disabled) + reqs = hsw_power_well_requesters(display, regs, pw_idx); + + ret = intel_de_wait_for_clear(display, regs->driver, + HSW_PWR_WELL_CTL_STATE(pw_idx), + reqs ? 0 : 1); + if (!ret) return; + /* Refresh requesters in case they popped up during the wait. */ + if (!reqs) + reqs = hsw_power_well_requesters(display, regs, pw_idx); + drm_dbg_kms(display->drm, "%s forced on (bios:%d driver:%d kvmr:%d debug:%d)\n", intel_power_well_name(power_well), @@ -350,8 +370,7 @@ static void hsw_power_well_enable(struct intel_display *display, if (power_well->desc->has_fuses) { enum skl_power_gate pg; - pg = DISPLAY_VER(display) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : - SKL_PW_CTL_IDX_TO_PG(pw_idx); + pg = pw_idx_to_pg(display, pw_idx); /* Wa_16013190616:adlp */ if (display->platform.alderlake_p && pg == SKL_PG1) @@ -375,8 +394,8 @@ static void hsw_power_well_enable(struct intel_display *display, if (power_well->desc->has_fuses) { enum skl_power_gate pg; - pg = DISPLAY_VER(display) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) : - SKL_PW_CTL_IDX_TO_PG(pw_idx); + pg = pw_idx_to_pg(display, pw_idx); + gen9_wait_for_power_well_fuses(display, pg); } @@ -482,12 +501,10 @@ static void icl_tc_port_assert_ref_held(struct intel_display *display, static void icl_tc_cold_exit(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret, tries = 0; while (1) { - ret = snb_pcode_write_timeout(&i915->uncore, ICL_PCODE_EXIT_TCCOLD, 0, - 250, 1); + ret = intel_pcode_write(display->drm, ICL_PCODE_EXIT_TCCOLD, 0); if (ret != -EAGAIN || ++tries == 3) break; msleep(1); @@ -498,7 +515,7 @@ static void icl_tc_cold_exit(struct intel_display *display) msleep(1); /* TODO: turn failure into a error as soon i915 CI updates ICL IFWI */ - drm_dbg_kms(&i915->drm, "TC cold block %s\n", ret ? "failed" : + drm_dbg_kms(display->drm, "TC cold block %s\n", ret ? "failed" : "succeeded"); } @@ -511,6 +528,8 @@ icl_tc_phy_aux_power_well_enable(struct intel_display *display, const struct i915_power_well_regs *regs = power_well->desc->ops->regs; bool is_tbt = power_well->desc->is_tc_tbt; bool timeout_expected; + u32 val; + int ret; icl_tc_port_assert_ref_held(display, power_well, dig_port); @@ -537,10 +556,11 @@ icl_tc_phy_aux_power_well_enable(struct intel_display *display, tc_port = TGL_AUX_PW_TO_TC_PORT(i915_power_well_instance(power_well)->hsw.idx); - if (wait_for(intel_dkl_phy_read(display, DKL_CMN_UC_DW_27(tc_port)) & - DKL_CMN_UC_DW27_UC_HEALTH, 1)) - drm_warn(display->drm, - "Timeout waiting TC uC health\n"); + ret = poll_timeout_us(val = intel_dkl_phy_read(display, DKL_CMN_UC_DW_27(tc_port)), + val & DKL_CMN_UC_DW27_UC_HEALTH, + 100, 1000, false); + if (ret) + drm_warn(display->drm, "Timeout waiting TC uC health\n"); } } @@ -829,7 +849,7 @@ static void assert_can_enable_dc5(struct intel_display *display) assert_display_rpm_held(display); - assert_dmc_loaded(display); + assert_main_dmc_loaded(display); } void gen9_enable_dc5(struct intel_display *display) @@ -860,7 +880,7 @@ static void assert_can_enable_dc6(struct intel_display *display) DC_STATE_EN_UPTO_DC6), "DC6 already programmed to be enabled.\n"); - assert_dmc_loaded(display); + assert_main_dmc_loaded(display); } void skl_enable_dc6(struct intel_display *display) @@ -1106,6 +1126,8 @@ static void vlv_set_power_well(struct intel_display *display, u32 mask; u32 state; u32 ctrl; + u32 val; + int ret; mask = PUNIT_PWRGT_MASK(pw_idx); state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) : @@ -1113,10 +1135,8 @@ static void vlv_set_power_well(struct intel_display *display, vlv_punit_get(display->drm); -#define COND \ - ((vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS) & mask) == state) - - if (COND) + val = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS); + if ((val & mask) == state) goto out; ctrl = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_CTRL); @@ -1124,14 +1144,15 @@ static void vlv_set_power_well(struct intel_display *display, ctrl |= state; vlv_punit_write(display->drm, PUNIT_REG_PWRGT_CTRL, ctrl); - if (wait_for(COND, 100)) + ret = poll_timeout_us(val = vlv_punit_read(display->drm, PUNIT_REG_PWRGT_STATUS), + (val & mask) == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(display->drm, PUNIT_REG_PWRGT_CTRL)); -#undef COND - out: vlv_punit_put(display->drm); } @@ -1192,7 +1213,7 @@ static void vlv_init_display_clock_gating(struct intel_display *display) * (and never recovering) in this case. intel_dsi_post_disable() will * clear it when we turn off the display. */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, ~DPOUNIT_CLOCK_GATE_DISABLE, VRHUNIT_CLOCK_GATE_DISABLE); /* @@ -1695,23 +1716,24 @@ static void chv_set_pipe_power_well(struct intel_display *display, enum pipe pipe = PIPE_A; u32 state; u32 ctrl; + int ret; state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); vlv_punit_get(display->drm); -#define COND \ - ((vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state) - - if (COND) + ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); + if ((ctrl & DP_SSS_MASK(pipe)) == state) goto out; - ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM); ctrl &= ~DP_SSC_MASK(pipe); ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); vlv_punit_write(display->drm, PUNIT_REG_DSPSSPM, ctrl); - if (wait_for(COND, 100)) + ret = poll_timeout_us(ctrl = vlv_punit_read(display->drm, PUNIT_REG_DSPSSPM), + (ctrl & DP_SSS_MASK(pipe)) == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "timeout setting power well state %08x (%08x)\n", state, @@ -1749,7 +1771,6 @@ static void chv_pipe_power_well_disable(struct intel_display *display, static void tgl_tc_cold_request(struct intel_display *display, bool block) { - struct drm_i915_private *i915 = to_i915(display->drm); u8 tries = 0; int ret; @@ -1766,7 +1787,7 @@ tgl_tc_cold_request(struct intel_display *display, bool block) * Spec states that we should timeout the request after 200us * but the function below will timeout after 500us */ - ret = snb_pcode_read(&i915->uncore, TGL_PCODE_TCCOLD, &low_val, &high_val); + ret = intel_pcode_read(display->drm, TGL_PCODE_TCCOLD, &low_val, &high_val); if (ret == 0) { if (block && (low_val & TGL_PCODE_EXIT_TCCOLD_DATA_L_EXIT_FAILED)) @@ -1782,10 +1803,9 @@ tgl_tc_cold_request(struct intel_display *display, bool block) } if (ret) - drm_err(&i915->drm, "TC cold %sblock failed\n", - block ? "" : "un"); + drm_err(display->drm, "TC cold %sblock failed\n", block ? "" : "un"); else - drm_dbg_kms(&i915->drm, "TC cold %sblock succeeded\n", + drm_dbg_kms(display->drm, "TC cold %sblock succeeded\n", block ? "" : "un"); } diff --git a/drivers/gpu/drm/i915/display/intel_display_regs.h b/drivers/gpu/drm/i915/display/intel_display_regs.h index e101105da4af..9d71e26a4fa2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_regs.h +++ b/drivers/gpu/drm/i915/display/intel_display_regs.h @@ -2195,20 +2195,17 @@ #define HSW_PWR_WELL_FORCE_ON (1 << 19) #define HSW_PWR_WELL_CTL6 _MMIO(0x45414) +/* SKL Fuse Status */ +enum skl_power_gate { + SKL_PG0, + SKL_PG1, + SKL_PG2, + ICL_PG3, + ICL_PG4, +}; + #define SKL_FUSE_STATUS _MMIO(0x42000) #define SKL_FUSE_DOWNLOAD_STATUS (1 << 31) -/* - * PG0 is HW controlled, so doesn't have a corresponding power well control knob - * SKL_DISP_PW1_IDX..SKL_DISP_PW2_IDX -> PG1..PG2 - */ -#define SKL_PW_CTL_IDX_TO_PG(pw_idx) \ - ((pw_idx) - SKL_PW_CTL_IDX_PW_1 + SKL_PG1) -/* - * PG0 is HW controlled, so doesn't have a corresponding power well control knob - * ICL_DISP_PW1_IDX..ICL_DISP_PW4_IDX -> PG1..PG4 - */ -#define ICL_PW_CTL_IDX_TO_PG(pw_idx) \ - ((pw_idx) - ICL_PW_CTL_IDX_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) /* Per-pipe DDI Function Control */ @@ -2893,6 +2890,7 @@ #define DP_PIN_ASSIGNMENT_SHIFT(idx) ((idx) * 4) #define DP_PIN_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 4)) #define DP_PIN_ASSIGNMENT(idx, x) ((x) << ((idx) * 4)) +/* See enum intel_tc_pin_assignment for the pin assignment field values. */ #define _TCSS_DDI_STATUS_1 0x161500 #define _TCSS_DDI_STATUS_2 0x161504 @@ -2900,6 +2898,7 @@ _TCSS_DDI_STATUS_1, \ _TCSS_DDI_STATUS_2)) #define TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK REG_GENMASK(28, 25) +/* See enum intel_tc_pin_assignment for the pin assignment field values. */ #define TCSS_DDI_STATUS_READY REG_BIT(2) #define TCSS_DDI_STATUS_HPD_LIVE_STATUS_TBT REG_BIT(1) #define TCSS_DDI_STATUS_HPD_LIVE_STATUS_ALT REG_BIT(0) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 30c7315fc25e..358ab922d7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -50,15 +50,17 @@ #include "intel_display_limits.h" #include "intel_display_power.h" #include "intel_dpll_mgr.h" +#include "intel_dsi_vbt_defs.h" #include "intel_wm_types.h" struct cec_notifier; struct drm_printer; -struct __intel_global_objs_state; struct intel_connector; struct intel_ddi_buf_trans; struct intel_fbc; +struct intel_global_objs_state; struct intel_hdcp_shim; +struct intel_panic; struct intel_tc_port; /* @@ -146,6 +148,9 @@ struct intel_framebuffer { unsigned int min_alignment; unsigned int vtd_guard; + + unsigned int (*panic_tiling)(unsigned int x, unsigned int y, unsigned int width); + struct intel_panic *panic; }; enum intel_hotplug_state { @@ -591,7 +596,7 @@ struct intel_atomic_state { struct ref_tracker *wakeref; - struct __intel_global_objs_state *global_objs; + struct intel_global_objs_state *global_objs; int num_global_objs; /* Internal commit, as opposed to userspace/client initiated one */ @@ -640,7 +645,6 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; - u32 phys_dma_addr; /* for cursor_needs_physical */ /* for legacy cursor fb unpin */ struct drm_vblank_work unpin_work; @@ -663,6 +667,9 @@ struct intel_plane_state { /* chroma upsampler control register */ u32 cus_ctl; + /* surface address register */ + u32 surf; + /* * scaler_id * = -1 : not using a scaler @@ -939,10 +946,6 @@ struct intel_csc_matrix { u16 postoff[3]; }; -void intel_io_mmio_fw_write(void *ctx, i915_reg_t reg, u32 val); - -typedef void (*intel_io_reg_write)(void *ctx, i915_reg_t reg, u32 val); - struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1120,6 +1123,7 @@ struct intel_crtc_state { bool req_psr2_sdp_prior_scanline; bool has_panel_replay; bool wm_level_disabled; + bool pkg_c_latency_used; u32 dc3co_exitline; u16 su_y_granularity; u8 active_non_psr_pipes; @@ -1303,6 +1307,7 @@ struct intel_crtc_state { /* For DSB based pipe updates */ struct intel_dsb *dsb_color, *dsb_commit; bool use_dsb; + bool use_flipq; u32 psr2_man_track_ctl; @@ -1369,6 +1374,21 @@ struct intel_pipe_crc { enum intel_pipe_crc_source source; }; +enum intel_flipq_id { + INTEL_FLIPQ_PLANE_1, + INTEL_FLIPQ_PLANE_2, + INTEL_FLIPQ_PLANE_3, + INTEL_FLIPQ_GENERAL, + INTEL_FLIPQ_FAST, + MAX_INTEL_FLIPQ, +}; + +struct intel_flipq { + u32 start_mmioaddr; + enum intel_flipq_id flipq_id; + u8 tail; +}; + struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -1395,11 +1415,15 @@ struct intel_crtc { struct drm_pending_vblank_event *flip_done_event; /* armed event for DSB based updates */ struct drm_pending_vblank_event *dsb_event; + /* armed event for flip queue based updates */ + struct drm_pending_vblank_event *flipq_event; /* Access to these should be protected by display->irq.lock. */ bool cpu_fifo_underrun_disabled; bool pch_fifo_underrun_disabled; + struct intel_flipq flipq[MAX_INTEL_FLIPQ]; + /* per-pipe watermark state */ struct { /* watermarks currently being used */ @@ -1512,6 +1536,7 @@ struct intel_plane { bool (*get_hw_state)(struct intel_plane *plane, enum pipe *pipe); int (*check_plane)(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); + u32 (*surf_offset)(const struct intel_plane_state *plane_state); int (*min_cdclk)(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); void (*async_flip)(struct intel_dsb *dsb, @@ -1521,6 +1546,8 @@ struct intel_plane { bool async_flip); void (*enable_flip_done)(struct intel_plane *plane); void (*disable_flip_done)(struct intel_plane *plane); + /* For drm_panic */ + void (*disable_tiling)(struct intel_plane *plane); }; #define to_intel_atomic_state(x) container_of(x, struct intel_atomic_state, base) @@ -1659,6 +1686,7 @@ struct intel_psr { u8 entry_setup_frames; bool link_ok; + bool pkg_c_latency_used; u8 active_non_psr_pipes; }; diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.c b/drivers/gpu/drm/i915/display/intel_display_wa.c index f57280e9d041..31cd2c9cd488 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.c +++ b/drivers/gpu/drm/i915/display/intel_display_wa.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <drm/drm_print.h> + #include "i915_reg.h" #include "intel_de.h" #include "intel_display_core.h" @@ -39,3 +41,36 @@ void intel_display_wa_apply(struct intel_display *display) else if (DISPLAY_VER(display) == 11) gen11_display_wa_apply(display); } + +/* + * Wa_16025573575: + * Fixes: Issue with bitbashing on Xe3 based platforms. + * Workaround: Set masks bits in GPIO CTL and preserve it during bitbashing sequence. + */ +static bool intel_display_needs_wa_16025573575(struct intel_display *display) +{ + return DISPLAY_VERx100(display) == 3000 || DISPLAY_VERx100(display) == 3002; +} + +/* + * Wa_14011503117: + * Fixes: Before enabling the scaler DE fatal error is masked + * Workaround: Unmask the DE fatal error register after enabling the scaler + * and after waiting of at least 1 frame. + */ +bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, const char *name) +{ + switch (wa) { + case INTEL_DISPLAY_WA_16023588340: + return intel_display_needs_wa_16023588340(display); + case INTEL_DISPLAY_WA_16025573575: + return intel_display_needs_wa_16025573575(display); + case INTEL_DISPLAY_WA_14011503117: + return DISPLAY_VER(display) == 13; + default: + drm_WARN(display->drm, 1, "Missing Wa number: %s\n", name); + break; + } + + return false; +} diff --git a/drivers/gpu/drm/i915/display/intel_display_wa.h b/drivers/gpu/drm/i915/display/intel_display_wa.h index babd9d16603d..abc1df83f066 100644 --- a/drivers/gpu/drm/i915/display/intel_display_wa.h +++ b/drivers/gpu/drm/i915/display/intel_display_wa.h @@ -21,4 +21,15 @@ static inline bool intel_display_needs_wa_16023588340(struct intel_display *disp bool intel_display_needs_wa_16023588340(struct intel_display *display); #endif +enum intel_display_wa { + INTEL_DISPLAY_WA_16023588340, + INTEL_DISPLAY_WA_16025573575, + INTEL_DISPLAY_WA_14011503117, +}; + +bool __intel_display_wa(struct intel_display *display, enum intel_display_wa wa, const char *name); + +#define intel_display_wa(__display, __wa) \ + __intel_display_wa((__display), INTEL_DISPLAY_WA_##__wa, __stringify(__wa)) + #endif diff --git a/drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h b/drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h index 3d8fa667cc73..f8ffeec29e93 100644 --- a/drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_dkl_phy_regs.h @@ -153,6 +153,7 @@ struct intel_dkl_phy_reg { #define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK, (val)) #define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK REG_GENMASK(6, 5) #define DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(val) REG_FIELD_PREP(DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, (val)) +#define LOADGEN_SHARING_PMD_DISABLE REG_BIT(12) #define _DKL_TX_FW_CALIB_LN0 0x02F8 #define _DKL_TX_FW_CALIB_LN1 0x12F8 diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index a10e56e7cf31..77a0199f9ea5 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -24,9 +24,13 @@ #include <linux/debugfs.h> #include <linux/firmware.h> +#include <drm/drm_vblank.h> + +#include <drm/drm_file.h> +#include <drm/drm_print.h> -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_power_well.h" @@ -35,6 +39,7 @@ #include "intel_display_types.h" #include "intel_dmc.h" #include "intel_dmc_regs.h" +#include "intel_flipq.h" #include "intel_step.h" /** @@ -179,7 +184,8 @@ static const char *dmc_firmware_default(struct intel_display *display, u32 *size const char *fw_path = NULL; u32 max_fw_size = 0; - if (DISPLAY_VERx100(display) == 3000) { + if (DISPLAY_VERx100(display) == 3002 || + DISPLAY_VERx100(display) == 3000) { fw_path = XE3LPD_DMC_PATH; max_fw_size = XE2LPD_DMC_MAX_FW_SIZE; } else if (DISPLAY_VERx100(display) == 2000) { @@ -432,25 +438,22 @@ static void disable_event_handler(struct intel_display *display, intel_de_write(display, htp_reg, 0); } -static void disable_all_event_handlers(struct intel_display *display) +static void disable_all_event_handlers(struct intel_display *display, + enum intel_dmc_id dmc_id) { - enum intel_dmc_id dmc_id; + int handler; /* TODO: disable the event handlers on pre-GEN12 platforms as well */ if (DISPLAY_VER(display) < 12) return; - for_each_dmc_id(dmc_id) { - int handler; - - if (!has_dmc_id_fw(display, dmc_id)) - continue; + if (!has_dmc_id_fw(display, dmc_id)) + return; - for (handler = 0; handler < DMC_EVENT_HANDLER_COUNT_GEN12; handler++) - disable_event_handler(display, - DMC_EVT_CTL(display, dmc_id, handler), - DMC_EVT_HTP(display, dmc_id, handler)); - } + for (handler = 0; handler < DMC_EVENT_HANDLER_COUNT_GEN12; handler++) + disable_event_handler(display, + DMC_EVT_CTL(display, dmc_id, handler), + DMC_EVT_HTP(display, dmc_id, handler)); } static void adlp_pipedmc_clock_gating_wa(struct intel_display *display, bool enable) @@ -482,12 +485,13 @@ static void mtl_pipedmc_clock_gating_wa(struct intel_display *display) * for pipe A and B. */ intel_de_rmw(display, GEN9_CLKGATE_DIS_0, 0, - MTL_PIPEDMC_GATING_DIS_A | MTL_PIPEDMC_GATING_DIS_B); + MTL_PIPEDMC_GATING_DIS(PIPE_A) | + MTL_PIPEDMC_GATING_DIS(PIPE_B)); } static void pipedmc_clock_gating_wa(struct intel_display *display, bool enable) { - if (DISPLAY_VER(display) >= 14 && enable) + if (display->platform.meteorlake && enable) mtl_pipedmc_clock_gating_wa(display); else if (DISPLAY_VER(display) == 13) adlp_pipedmc_clock_gating_wa(display, enable); @@ -500,46 +504,11 @@ static u32 pipedmc_interrupt_mask(struct intel_display *display) * triggering it during the first DC state transition. Figure * out what is going on... */ - return PIPEDMC_GTT_FAULT | + return PIPEDMC_FLIPQ_PROG_DONE | + PIPEDMC_GTT_FAULT | PIPEDMC_ATS_FAULT; } -void intel_dmc_enable_pipe(struct intel_display *display, enum pipe pipe) -{ - enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(pipe); - - if (!is_valid_dmc_id(dmc_id) || !has_dmc_id_fw(display, dmc_id)) - return; - - if (DISPLAY_VER(display) >= 20) { - intel_de_write(display, PIPEDMC_INTERRUPT(pipe), pipedmc_interrupt_mask(display)); - intel_de_write(display, PIPEDMC_INTERRUPT_MASK(pipe), ~pipedmc_interrupt_mask(display)); - } - - if (DISPLAY_VER(display) >= 14) - intel_de_rmw(display, MTL_PIPEDMC_CONTROL, 0, PIPEDMC_ENABLE_MTL(pipe)); - else - intel_de_rmw(display, PIPEDMC_CONTROL(pipe), 0, PIPEDMC_ENABLE); -} - -void intel_dmc_disable_pipe(struct intel_display *display, enum pipe pipe) -{ - enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(pipe); - - if (!is_valid_dmc_id(dmc_id) || !has_dmc_id_fw(display, dmc_id)) - return; - - if (DISPLAY_VER(display) >= 14) - intel_de_rmw(display, MTL_PIPEDMC_CONTROL, PIPEDMC_ENABLE_MTL(pipe), 0); - else - intel_de_rmw(display, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0); - - if (DISPLAY_VER(display) >= 20) { - intel_de_write(display, PIPEDMC_INTERRUPT_MASK(pipe), ~0); - intel_de_write(display, PIPEDMC_INTERRUPT(pipe), pipedmc_interrupt_mask(display)); - } -} - static u32 dmc_evt_ctl_disable(void) { return REG_FIELD_PREP(DMC_EVT_CTL_TYPE_MASK, @@ -577,6 +546,238 @@ static bool is_event_handler(struct intel_display *display, REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == event_id; } +static bool disable_dmc_evt(struct intel_display *display, + enum intel_dmc_id dmc_id, + i915_reg_t reg, u32 data) +{ + if (!is_dmc_evt_ctl_reg(display, dmc_id, reg)) + return false; + + /* keep all pipe DMC events disabled by default */ + if (dmc_id != DMC_FW_MAIN) + return true; + + /* also disable the flip queue event on the main DMC on TGL */ + if (display->platform.tigerlake && + is_event_handler(display, dmc_id, MAINDMC_EVENT_CLK_MSEC, reg, data)) + return true; + + /* also disable the HRR event on the main DMC on TGL/ADLS */ + if ((display->platform.tigerlake || display->platform.alderlake_s) && + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg, data)) + return true; + + return false; +} + +static u32 dmc_mmiodata(struct intel_display *display, + struct intel_dmc *dmc, + enum intel_dmc_id dmc_id, int i) +{ + if (disable_dmc_evt(display, dmc_id, + dmc->dmc_info[dmc_id].mmioaddr[i], + dmc->dmc_info[dmc_id].mmiodata[i])) + return dmc_evt_ctl_disable(); + else + return dmc->dmc_info[dmc_id].mmiodata[i]; +} + +static void dmc_load_mmio(struct intel_display *display, enum intel_dmc_id dmc_id) +{ + struct intel_dmc *dmc = display_to_dmc(display); + int i; + + for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { + intel_de_write(display, dmc->dmc_info[dmc_id].mmioaddr[i], + dmc_mmiodata(display, dmc, dmc_id, i)); + } +} + +static void dmc_load_program(struct intel_display *display, enum intel_dmc_id dmc_id) +{ + struct intel_dmc *dmc = display_to_dmc(display); + int i; + + disable_all_event_handlers(display, dmc_id); + + preempt_disable(); + + for (i = 0; i < dmc->dmc_info[dmc_id].dmc_fw_size; i++) { + intel_de_write_fw(display, + DMC_PROGRAM(dmc->dmc_info[dmc_id].start_mmioaddr, i), + dmc->dmc_info[dmc_id].payload[i]); + } + + preempt_enable(); + + dmc_load_mmio(display, dmc_id); +} + +static void assert_dmc_loaded(struct intel_display *display, + enum intel_dmc_id dmc_id) +{ + struct intel_dmc *dmc = display_to_dmc(display); + u32 expected, found; + int i; + + if (!is_valid_dmc_id(dmc_id) || !has_dmc_id_fw(display, dmc_id)) + return; + + found = intel_de_read(display, DMC_PROGRAM(dmc->dmc_info[dmc_id].start_mmioaddr, 0)); + expected = dmc->dmc_info[dmc_id].payload[0]; + + drm_WARN(display->drm, found != expected, + "DMC %d program storage start incorrect (expected 0x%x, current 0x%x)\n", + dmc_id, expected, found); + + for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { + i915_reg_t reg = dmc->dmc_info[dmc_id].mmioaddr[i]; + + found = intel_de_read(display, reg); + expected = dmc_mmiodata(display, dmc, dmc_id, i); + + /* once set DMC_EVT_CTL_ENABLE can't be cleared :/ */ + if (is_dmc_evt_ctl_reg(display, dmc_id, reg)) { + found &= ~DMC_EVT_CTL_ENABLE; + expected &= ~DMC_EVT_CTL_ENABLE; + } + + drm_WARN(display->drm, found != expected, + "DMC %d mmio[%d]/0x%x incorrect (expected 0x%x, current 0x%x)\n", + dmc_id, i, i915_mmio_reg_offset(reg), expected, found); + } +} + +void assert_main_dmc_loaded(struct intel_display *display) +{ + assert_dmc_loaded(display, DMC_FW_MAIN); +} + +static bool need_pipedmc_load_program(struct intel_display *display) +{ + /* On TGL/derivatives pipe DMC state is lost when PG1 is disabled */ + return DISPLAY_VER(display) == 12; +} + +static bool need_pipedmc_load_mmio(struct intel_display *display, enum pipe pipe) +{ + /* + * PTL: + * - pipe A/B DMC doesn't need save/restore + * - pipe C/D DMC is in PG0, needs manual save/restore + */ + if (DISPLAY_VER(display) == 30) + return pipe >= PIPE_C; + + /* + * FIXME LNL unclear, main DMC firmware has the pipe DMC A/B PG0 + * save/restore, but so far unable to see the loss of pipe DMC state + * in action. Are we just failing to turn off PG0 due to some other + * SoC level stuff? + */ + if (DISPLAY_VER(display) == 20) + return false; + + /* + * FIXME BMG untested, main DMC firmware has the + * pipe DMC A/B PG0 save/restore... + */ + if (display->platform.battlemage) + return false; + + /* + * DG2: + * - Pipe DMCs presumably in PG0? + * - No DC6, and even DC9 doesn't seem to result + * in loss of DMC state for whatever reason + */ + if (display->platform.dg2) + return false; + + /* + * ADL/MTL: + * - pipe A/B DMC is in PG0, saved/restored by the main DMC + * - pipe C/D DMC is in PG0, needs manual save/restore + */ + if (IS_DISPLAY_VER(display, 13, 14)) + return pipe >= PIPE_C; + + return false; +} + +static bool can_enable_pipedmc(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + /* + * On TGL/derivatives pipe DMC state is lost when PG1 is disabled. + * Do not even enable the pipe DMC when that can happen outside + * of driver control (PSR+DC5/6). + */ + if (DISPLAY_VER(display) == 12 && crtc_state->has_psr) + return false; + + return true; +} + +void intel_dmc_enable_pipe(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = crtc->pipe; + enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(pipe); + + if (!is_valid_dmc_id(dmc_id) || !has_dmc_id_fw(display, dmc_id)) + return; + + if (!can_enable_pipedmc(crtc_state)) { + intel_dmc_disable_pipe(crtc_state); + return; + } + + if (need_pipedmc_load_program(display)) + dmc_load_program(display, dmc_id); + else if (need_pipedmc_load_mmio(display, pipe)) + dmc_load_mmio(display, dmc_id); + + assert_dmc_loaded(display, dmc_id); + + if (DISPLAY_VER(display) >= 20) { + intel_flipq_reset(display, pipe); + + intel_de_write(display, PIPEDMC_INTERRUPT(pipe), pipedmc_interrupt_mask(display)); + intel_de_write(display, PIPEDMC_INTERRUPT_MASK(pipe), ~pipedmc_interrupt_mask(display)); + } + + if (DISPLAY_VER(display) >= 14) + intel_de_rmw(display, MTL_PIPEDMC_CONTROL, 0, PIPEDMC_ENABLE_MTL(pipe)); + else + intel_de_rmw(display, PIPEDMC_CONTROL(pipe), 0, PIPEDMC_ENABLE); +} + +void intel_dmc_disable_pipe(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = crtc->pipe; + enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(pipe); + + if (!is_valid_dmc_id(dmc_id) || !has_dmc_id_fw(display, dmc_id)) + return; + + if (DISPLAY_VER(display) >= 14) + intel_de_rmw(display, MTL_PIPEDMC_CONTROL, PIPEDMC_ENABLE_MTL(pipe), 0); + else + intel_de_rmw(display, PIPEDMC_CONTROL(pipe), PIPEDMC_ENABLE, 0); + + if (DISPLAY_VER(display) >= 20) { + intel_de_write(display, PIPEDMC_INTERRUPT_MASK(pipe), ~0); + intel_de_write(display, PIPEDMC_INTERRUPT(pipe), pipedmc_interrupt_mask(display)); + + intel_flipq_reset(display, pipe); + } +} + static void dmc_configure_event(struct intel_display *display, enum intel_dmc_id dmc_id, unsigned int event_id, @@ -637,42 +838,6 @@ void intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(struct intel_display dmc_configure_event(display, dmc_id, PIPEDMC_EVENT_VBLANK, enable); } -static bool disable_dmc_evt(struct intel_display *display, - enum intel_dmc_id dmc_id, - i915_reg_t reg, u32 data) -{ - if (!is_dmc_evt_ctl_reg(display, dmc_id, reg)) - return false; - - /* keep all pipe DMC events disabled by default */ - if (dmc_id != DMC_FW_MAIN) - return true; - - /* also disable the flip queue event on the main DMC on TGL */ - if (display->platform.tigerlake && - is_event_handler(display, dmc_id, MAINDMC_EVENT_CLK_MSEC, reg, data)) - return true; - - /* also disable the HRR event on the main DMC on TGL/ADLS */ - if ((display->platform.tigerlake || display->platform.alderlake_s) && - is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg, data)) - return true; - - return false; -} - -static u32 dmc_mmiodata(struct intel_display *display, - struct intel_dmc *dmc, - enum intel_dmc_id dmc_id, int i) -{ - if (disable_dmc_evt(display, dmc_id, - dmc->dmc_info[dmc_id].mmioaddr[i], - dmc->dmc_info[dmc_id].mmiodata[i])) - return dmc_evt_ctl_disable(); - else - return dmc->dmc_info[dmc_id].mmiodata[i]; -} - /** * intel_dmc_load_program() - write the firmware from memory to register. * @display: display instance @@ -684,37 +849,26 @@ static u32 dmc_mmiodata(struct intel_display *display, void intel_dmc_load_program(struct intel_display *display) { struct i915_power_domains *power_domains = &display->power.domains; - struct intel_dmc *dmc = display_to_dmc(display); enum intel_dmc_id dmc_id; - u32 i; if (!intel_dmc_has_payload(display)) return; - pipedmc_clock_gating_wa(display, true); - - disable_all_event_handlers(display); - assert_display_rpm_held(display); - preempt_disable(); + pipedmc_clock_gating_wa(display, true); for_each_dmc_id(dmc_id) { - for (i = 0; i < dmc->dmc_info[dmc_id].dmc_fw_size; i++) { - intel_de_write_fw(display, - DMC_PROGRAM(dmc->dmc_info[dmc_id].start_mmioaddr, i), - dmc->dmc_info[dmc_id].payload[i]); - } + dmc_load_program(display, dmc_id); + assert_dmc_loaded(display, dmc_id); } - preempt_enable(); - - for_each_dmc_id(dmc_id) { - for (i = 0; i < dmc->dmc_info[dmc_id].mmio_count; i++) { - intel_de_write(display, dmc->dmc_info[dmc_id].mmioaddr[i], - dmc_mmiodata(display, dmc, dmc_id, i)); - } - } + if (DISPLAY_VER(display) >= 20) + intel_de_write(display, DMC_FQ_W2_PTS_CFG_SEL, + PIPE_D_DMC_W2_PTS_CONFIG_SELECT(PIPE_D) | + PIPE_C_DMC_W2_PTS_CONFIG_SELECT(PIPE_C) | + PIPE_B_DMC_W2_PTS_CONFIG_SELECT(PIPE_B) | + PIPE_A_DMC_W2_PTS_CONFIG_SELECT(PIPE_A)); power_domains->dc_state = 0; @@ -732,26 +886,17 @@ void intel_dmc_load_program(struct intel_display *display) */ void intel_dmc_disable_program(struct intel_display *display) { + enum intel_dmc_id dmc_id; + if (!intel_dmc_has_payload(display)) return; pipedmc_clock_gating_wa(display, true); - disable_all_event_handlers(display); - pipedmc_clock_gating_wa(display, false); -} -void assert_dmc_loaded(struct intel_display *display) -{ - struct intel_dmc *dmc = display_to_dmc(display); + for_each_dmc_id(dmc_id) + disable_all_event_handlers(display, dmc_id); - drm_WARN_ONCE(display->drm, !dmc, "DMC not initialized\n"); - drm_WARN_ONCE(display->drm, dmc && - !intel_de_read(display, DMC_PROGRAM(dmc->dmc_info[DMC_FW_MAIN].start_mmioaddr, 0)), - "DMC program storage start is NULL\n"); - drm_WARN_ONCE(display->drm, !intel_de_read(display, DMC_SSP_BASE), - "DMC SSP Base Not fine\n"); - drm_WARN_ONCE(display->drm, !intel_de_read(display, DMC_HTP_SKL), - "DMC HTP Not fine\n"); + pipedmc_clock_gating_wa(display, false); } static bool fw_info_matches_stepping(const struct intel_fw_info *fw_info, @@ -1170,7 +1315,6 @@ out: */ void intel_dmc_init(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_dmc *dmc; if (!HAS_DMC(display)) @@ -1213,7 +1357,7 @@ void intel_dmc_init(struct intel_display *display) display->dmc.dmc = dmc; drm_dbg_kms(display->drm, "Loading %s\n", dmc->fw_path); - queue_work(i915->unordered_wq, &dmc->work); + queue_work(display->wq.unordered, &dmc->work); return; @@ -1244,6 +1388,17 @@ void intel_dmc_suspend(struct intel_display *display) intel_dmc_runtime_pm_put(display); } +void intel_dmc_wait_fw_load(struct intel_display *display) +{ + struct intel_dmc *dmc = display_to_dmc(display); + + if (!HAS_DMC(display)) + return; + + if (dmc) + flush_work(&dmc->work); +} + /** * intel_dmc_resume() - init DMC firmware during system resume * @display: display instance @@ -1448,21 +1603,36 @@ DEFINE_SHOW_ATTRIBUTE(intel_dmc_debugfs_status); void intel_dmc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_dmc_info", 0444, minor->debugfs_root, + debugfs_create_file("i915_dmc_info", 0444, display->drm->debugfs_root, display, &intel_dmc_debugfs_status_fops); } void intel_pipedmc_irq_handler(struct intel_display *display, enum pipe pipe) { struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe); - u32 tmp; + u32 tmp = 0, int_vector; if (DISPLAY_VER(display) >= 20) { tmp = intel_de_read(display, PIPEDMC_INTERRUPT(pipe)); intel_de_write(display, PIPEDMC_INTERRUPT(pipe), tmp); + if (tmp & PIPEDMC_FLIPQ_PROG_DONE) { + spin_lock(&display->drm->event_lock); + + if (crtc->flipq_event) { + /* + * Update vblank counter/timestamp in case it + * hasn't been done yet for this frame. + */ + drm_crtc_accurate_vblank_count(&crtc->base); + + drm_crtc_send_vblank_event(&crtc->base, crtc->flipq_event); + crtc->flipq_event = NULL; + } + + spin_unlock(&display->drm->event_lock); + } + if (tmp & PIPEDMC_ATS_FAULT) drm_err_ratelimited(display->drm, "[CRTC:%d:%s] PIPEDMC ATS fault\n", crtc->base.base.id, crtc->base.name); @@ -1474,8 +1644,35 @@ void intel_pipedmc_irq_handler(struct intel_display *display, enum pipe pipe) crtc->base.base.id, crtc->base.name); } - tmp = intel_de_read(display, PIPEDMC_STATUS(pipe)) & PIPEDMC_INT_VECTOR_MASK; - if (tmp) + int_vector = intel_de_read(display, PIPEDMC_STATUS(pipe)) & PIPEDMC_INT_VECTOR_MASK; + if (tmp == 0 && int_vector != 0) drm_err(display->drm, "[CRTC:%d:%s]] PIPEDMC interrupt vector 0x%x\n", crtc->base.base.id, crtc->base.name, tmp); } + +void intel_pipedmc_enable_event(struct intel_crtc *crtc, + enum pipedmc_event_id event) +{ + struct intel_display *display = to_intel_display(crtc); + enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(crtc->pipe); + + dmc_configure_event(display, dmc_id, event, true); +} + +void intel_pipedmc_disable_event(struct intel_crtc *crtc, + enum pipedmc_event_id event) +{ + struct intel_display *display = to_intel_display(crtc); + enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(crtc->pipe); + + dmc_configure_event(display, dmc_id, event, false); +} + +u32 intel_pipedmc_start_mmioaddr(struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + struct intel_dmc *dmc = display_to_dmc(display); + enum intel_dmc_id dmc_id = PIPE_TO_DMC_ID(crtc->pipe); + + return dmc ? dmc->dmc_info[dmc_id].start_mmioaddr : 0; +} diff --git a/drivers/gpu/drm/i915/display/intel_dmc.h b/drivers/gpu/drm/i915/display/intel_dmc.h index a98e8deff13a..40e9dcb033cc 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.h +++ b/drivers/gpu/drm/i915/display/intel_dmc.h @@ -9,15 +9,19 @@ #include <linux/types.h> enum pipe; +enum pipedmc_event_id; struct drm_printer; +struct intel_crtc; +struct intel_crtc_state; struct intel_display; struct intel_dmc_snapshot; void intel_dmc_init(struct intel_display *display); void intel_dmc_load_program(struct intel_display *display); +void intel_dmc_wait_fw_load(struct intel_display *display); void intel_dmc_disable_program(struct intel_display *display); -void intel_dmc_enable_pipe(struct intel_display *display, enum pipe pipe); -void intel_dmc_disable_pipe(struct intel_display *display, enum pipe pipe); +void intel_dmc_enable_pipe(const struct intel_crtc_state *crtc_state); +void intel_dmc_disable_pipe(const struct intel_crtc_state *crtc_state); void intel_dmc_block_pkgc(struct intel_display *display, enum pipe pipe, bool block); void intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(struct intel_display *display, @@ -32,7 +36,15 @@ struct intel_dmc_snapshot *intel_dmc_snapshot_capture(struct intel_display *disp void intel_dmc_snapshot_print(const struct intel_dmc_snapshot *snapshot, struct drm_printer *p); void intel_dmc_update_dc6_allowed_count(struct intel_display *display, bool start_tracking); -void assert_dmc_loaded(struct intel_display *display); +void assert_main_dmc_loaded(struct intel_display *display); + +void intel_pipedmc_irq_handler(struct intel_display *display, enum pipe pipe); + +u32 intel_pipedmc_start_mmioaddr(struct intel_crtc *crtc); +void intel_pipedmc_enable_event(struct intel_crtc *crtc, + enum pipedmc_event_id event); +void intel_pipedmc_disable_event(struct intel_crtc *crtc, + enum pipedmc_event_id event); void intel_pipedmc_irq_handler(struct intel_display *display, enum pipe pipe); diff --git a/drivers/gpu/drm/i915/display/intel_dmc_regs.h b/drivers/gpu/drm/i915/display/intel_dmc_regs.h index 6f406315dd65..c5aa49921cb9 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_dmc_regs.h @@ -287,6 +287,17 @@ enum pipedmc_event_id { #define MTL_PIPEDMC_CONTROL _MMIO(0x45250) #define PIPEDMC_ENABLE_MTL(pipe) REG_BIT(((pipe) - PIPE_A) * 4) +#define _PIPEDMC_LOAD_HTP_A 0x5f000 +#define _PIPEDMC_LOAD_HTP_B 0x5f400 +#define PIPEDMC_LOAD_HTP(pipe) _MMIO_PIPE((pipe), _PIPEDMC_LOAD_HTP_A, _PIPEDMC_LOAD_HTP_B) + +#define _PIPEDMC_CTL_A 0x5f064 +#define _PIPEDMC_CTL_B 0x5f464 +#define PIPEDMC_CTL(pipe) _MMIO_PIPE((pipe), _PIPEDMC_CTL_A, _PIPEDMC_CTL_B) +#define PIPEDMC_HALT REG_BIT(31) +#define PIPEDMC_STEP REG_BIT(27) +#define PIPEDMC_CLOCKGATE REG_BIT(23) + #define _PIPEDMC_STATUS_A 0x5f06c #define _PIPEDMC_STATUS_B 0x5f46c #define PIPEDMC_STATUS(pipe) _MMIO_PIPE((pipe), _PIPEDMC_STATUS_A, _PIPEDMC_STATUS_B) @@ -298,6 +309,138 @@ enum pipedmc_event_id { #define PIPEDMC_INT_VECTOR_FLIPQ_PROG_DONE REG_FIELD_PREP(PIPEDMC_INT_VECTOR_MASK, 0xff) /* Wa_16018781658:lnl[a0] */ #define PIPEDMC_EVT_PENDING REG_GENMASK(7, 0) +#define _PIPEDMC_FQ_CTRL_A 0x5f078 +#define _PIPEDMC_FQ_CTRL_B 0x5f478 +#define PIPEDMC_FQ_CTRL(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FQ_CTRL_A, _PIPEDMC_FQ_CTRL_B) +#define PIPEDMC_FQ_CTRL_ENABLE REG_BIT(31) +#define PIPEDMC_FQ_CTRL_ASYNC REG_BIT(29) +#define PIPEDMC_FQ_CTRL_PREEMPT REG_BIT(0) + +#define _PIPEDMC_FQ_STATUS_A 0x5f098 +#define _PIPEDMC_FQ_STATUS_B 0x5f498 +#define PIPEDMC_FQ_STATUS(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FQ_STATUS_A, _PIPEDMC_FQ_STATUS_B) +#define PIPEDMC_FQ_STATUS_BUSY REG_BIT(31) +#define PIPEDMC_FQ_STATUS_W2_LIVE_STATUS REG_BIT(1) +#define PIPEDMC_FQ_STATUS_W1_LIVE_STATUS REG_BIT(0) + +#define _PIPEDMC_FPQ_ATOMIC_TP_A 0x5f0a0 +#define _PIPEDMC_FPQ_ATOMIC_TP_B 0x5f4a0 +#define PIPEDMC_FPQ_ATOMIC_TP(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FPQ_ATOMIC_TP_A, _PIPEDMC_FPQ_ATOMIC_TP_B) +#define PIPEDMC_FPQ_PLANEQ_3_TP_MASK REG_GENMASK(31, 26) +#define PIPEDMC_FPQ_PLANEQ_3_TP(tail) REG_FIELD_PREP(PIPEDMC_FPQ_PLANEQ_3_TP_MASK, (tail)) +#define PIPEDMC_FPQ_PLANEQ_2_TP_MASK REG_GENMASK(24, 19) +#define PIPEDMC_FPQ_PLANEQ_2_TP(tail) REG_FIELD_PREP(PIPEDMC_FPQ_PLANEQ_2_TP_MASK, (tail)) +#define PIPEDMC_FPQ_PLANEQ_1_TP_MASK REG_GENMASK(17, 12) +#define PIPEDMC_FPQ_PLANEQ_1_TP(tail) REG_FIELD_PREP(PIPEDMC_FPQ_PLANEQ_1_TP_MASK, (tail)) +#define PIPEDMC_FPQ_FASTQ_TP_MASK REG_GENMASK(10, 6) +#define PIPEDMC_FPQ_FASTQ_TP(tail) REG_FIELD_PREP(PIPEDMC_FPQ_FASTQ_TP_MASK, (tail)) +#define PIPEDMC_FPQ_GENERALQ_TP_MASK REG_GENMASK(4, 0) +#define PIPEDMC_FPQ_GENERALQ_TP(tail) REG_FIELD_PREP(PIPEDMC_FPQ_GENERALQ_TP_MASK, (tail)) + +#define _PIPEDMC_FPQ_LINES_TO_W1_A 0x5f0a4 +#define _PIPEDMC_FPQ_LINES_TO_W1_B 0x5f4a4 +#define PIPEDMC_FPQ_LINES_TO_W1 _MMIO_PIPE((pipe), _PIPEDMC_FPQ_LINES_TO_W1_A, _PIPEDMC_FPQ_LINES_TO_W1_B) + +#define _PIPEDMC_FPQ_LINES_TO_W2_A 0x5f0a8 +#define _PIPEDMC_FPQ_LINES_TO_W2_B 0x5f4a8 +#define PIPEDMC_FPQ_LINES_TO_W2 _MMIO_PIPE((pipe), _PIPEDMC_FPQ_LINES_TO_W2_A, _PIPEDMC_FPQ_LINES_TO_W2_B) + +#define _PIPEDMC_SCANLINECMP_A 0x5f11c +#define _PIPEDMC_SCANLINECMP_B 0x5f51c +#define PIPEDMC_SCANLINECMP(pipe) _MMIO_PIPE((pipe), _PIPEDMC_SCANLINECMP_A, _PIPEDMC_SCANLINECMP_B) +#define PIPEDMC_SCANLINECMP_EN REG_BIT(31) +#define PIPEDMC_SCANLINE_NUMBER REG_GENMASK(20, 0) + +#define _PIPEDMC_SCANLINECMPLOWER_A 0x5f120 +#define _PIPEDMC_SCANLINECMPLOWER_B 0x5f520 +#define PIPEDMC_SCANLINECMPLOWER(pipe) _MMIO_PIPE((pipe), _PIPEDMC_SCANLINECMPLOWER_A, _PIPEDMC_SCANLINECMPLOWER_B) +#define PIPEDMC_SCANLINEINRANGECMP_EN REG_BIT(31) +#define PIPEDMC_SCANLINEOUTRANGECMP_EN REG_BIT(30) +#define PIPEDMC_SCANLINE_LOWER_MASK REG_GENMASK(20, 0) +#define PIPEDMC_SCANLINE_LOWER(scanline) REG_FIELD_PREP(PIPEDMC_SCANLINE_LOWER_MASK, (scanline)) + +#define _PIPEDMC_SCANLINECMPUPPER_A 0x5f124 +#define _PIPEDMC_SCANLINECMPUPPER_B 0x5f524 +#define PIPEDMC_SCANLINECMPUPPER(pipe) _MMIO_PIPE((pipe), _PIPEDMC_SCANLINECMPUPPER_A, _PIPEDMC_SCANLINECMPUPPER_B) +#define PIPEDMC_SCANLINE_UPPER_MASK REG_GENMASK(20, 0) +#define PIPEDMC_SCANLINE_UPPER(scanline) REG_FIELD_PREP(PIPEDMC_SCANLINE_UPPER_MASK, (scanline)) + +#define _MMIO_PIPEDMC_FPQ(pipe, fq_id, \ + reg_fpq1_a, reg_fpq2_a, reg_fpq3_a, reg_fpq4_a, \ + reg_fpq1_b, reg_fpq2_b, reg_fpq3_b, reg_fpq4_b) \ + _MMIO(_PICK_EVEN_2RANGES((fq_id), INTEL_FLIPQ_PLANE_3, \ + _PIPE((pipe), (reg_fpq1_a), (reg_fpq1_b)), \ + _PIPE((pipe), (reg_fpq2_a), (reg_fpq2_b)), \ + _PIPE((pipe), (reg_fpq3_a), (reg_fpq3_b)), \ + _PIPE((pipe), (reg_fpq4_a), (reg_fpq4_b)))) + +#define _PIPEDMC_FPQ1_HP_A 0x5f128 +#define _PIPEDMC_FPQ2_HP_A 0x5f138 +#define _PIPEDMC_FPQ3_HP_A 0x5f168 +#define _PIPEDMC_FPQ4_HP_A 0x5f174 +#define _PIPEDMC_FPQ5_HP_A 0x5f180 +#define _PIPEDMC_FPQ1_HP_B 0x5f528 +#define _PIPEDMC_FPQ2_HP_B 0x5f538 +#define _PIPEDMC_FPQ3_HP_B 0x5f568 +#define _PIPEDMC_FPQ4_HP_B 0x5f574 +#define _PIPEDMC_FPQ5_HP_B 0x5f580 +#define PIPEDMC_FPQ_HP(pipe, fq_id) _MMIO_PIPEDMC_FPQ((pipe), (fq_id), \ + _PIPEDMC_FPQ1_HP_A, _PIPEDMC_FPQ2_HP_A, \ + _PIPEDMC_FPQ3_HP_A, _PIPEDMC_FPQ4_HP_A, \ + _PIPEDMC_FPQ1_HP_B, _PIPEDMC_FPQ2_HP_B, \ + _PIPEDMC_FPQ3_HP_B, _PIPEDMC_FPQ4_HP_B) + +#define _PIPEDMC_FPQ1_TP_A 0x5f12c +#define _PIPEDMC_FPQ2_TP_A 0x5f13c +#define _PIPEDMC_FPQ3_TP_A 0x5f16c +#define _PIPEDMC_FPQ4_TP_A 0x5f178 +#define _PIPEDMC_FPQ5_TP_A 0x5f184 +#define _PIPEDMC_FPQ1_TP_B 0x5f52c +#define _PIPEDMC_FPQ2_TP_B 0x5f53c +#define _PIPEDMC_FPQ3_TP_B 0x5f56c +#define _PIPEDMC_FPQ4_TP_B 0x5f578 +#define _PIPEDMC_FPQ5_TP_B 0x5f584 +#define PIPEDMC_FPQ_TP(pipe, fq_id) _MMIO_PIPEDMC_FPQ((pipe), (fq_id), \ + _PIPEDMC_FPQ1_TP_A, _PIPEDMC_FPQ2_TP_A, \ + _PIPEDMC_FPQ3_TP_A, _PIPEDMC_FPQ4_TP_A, \ + _PIPEDMC_FPQ1_TP_B, _PIPEDMC_FPQ2_TP_B, \ + _PIPEDMC_FPQ3_TP_B, _PIPEDMC_FPQ4_TP_B) + +#define _PIPEDMC_FPQ1_CHP_A 0x5f130 +#define _PIPEDMC_FPQ2_CHP_A 0x5f140 +#define _PIPEDMC_FPQ3_CHP_A 0x5f170 +#define _PIPEDMC_FPQ4_CHP_A 0x5f17c +#define _PIPEDMC_FPQ5_CHP_A 0x5f188 +#define _PIPEDMC_FPQ1_CHP_B 0x5f530 +#define _PIPEDMC_FPQ2_CHP_B 0x5f540 +#define _PIPEDMC_FPQ3_CHP_B 0x5f570 +#define _PIPEDMC_FPQ4_CHP_B 0x5f57c +#define _PIPEDMC_FPQ5_CHP_B 0x5f588 +#define PIPEDMC_FPQ_CHP(pipe, fq_id) _MMIO_PIPEDMC_FPQ((pipe), (fq_id), \ + _PIPEDMC_FPQ1_CHP_A, _PIPEDMC_FPQ2_CHP_A, \ + _PIPEDMC_FPQ3_CHP_A, _PIPEDMC_FPQ4_CHP_A, \ + _PIPEDMC_FPQ1_CHP_B, _PIPEDMC_FPQ2_CHP_B, \ + _PIPEDMC_FPQ3_CHP_B, _PIPEDMC_FPQ4_CHP_B) + +#define _PIPEDMC_FPQ_TS_A 0x5f134 +#define _PIPEDMC_FPQ_TS_B 0x5f534 +#define PIPEDMC_FPQ_TS(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FPQ_TS_A, _PIPEDMC_FPQ_TS_B) + +#define _PIPEDMC_SCANLINE_RO_A 0x5f144 +#define _PIPEDMC_SCANLINE_RO_B 0x5f544 +#define PIPEDMC_SCANLINE_RO(pipe) _MMIO_PIPE((pipe), _PIPEDMC_SCANLINE_RO_A, _PIPEDMC_SCANLINE_RO_B) + +#define _PIPEDMC_FPQ_CTL1_A 0x5f160 +#define _PIPEDMC_FPQ_CTL1_B 0x5f560 +#define PIPEDMC_FPQ_CTL1(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FPQ_CTL1_A, _PIPEDMC_FPQ_CTL1_B) +#define PIPEDMC_SW_DMC_WAKE REG_BIT(0) + +#define _PIPEDMC_FPQ_CTL2_A 0x5f164 +#define _PIPEDMC_FPQ_CTL2_B 0x5f564 +#define PIPEDMC_FPQ_CTL2(pipe) _MMIO_PIPE((pipe), _PIPEDMC_FPQ_CTL2_A, _PIPEDMC_FPQ_CTL2_B) +#define PIPEDMC_DMC_INT_AT_DELAYED_VBLANK REG_BIT(1) +#define PIPEDMC_W1_DMC_WAKE REG_BIT(0) + #define _PIPEDMC_INTERRUPT_A 0x5f190 /* lnl+ */ #define _PIPEDMC_INTERRUPT_B 0x5f590 /* lnl+ */ #define PIPEDMC_INTERRUPT(pipe) _MMIO_PIPE((pipe), _PIPEDMC_INTERRUPT_A, _PIPEDMC_INTERRUPT_B) @@ -394,4 +537,51 @@ enum pipedmc_event_id { #define DMC_WAKELOCK_CTL_REQ REG_BIT(31) #define DMC_WAKELOCK_CTL_ACK REG_BIT(15) +#define DMC_FQ_W2_PTS_CFG_SEL _MMIO(0x8f240) +#define PIPE_D_DMC_W2_PTS_CONFIG_SELECT_MASK REG_GENMASK(26, 24) +#define PIPE_D_DMC_W2_PTS_CONFIG_SELECT(pipe) REG_FIELD_PREP(PIPE_D_DMC_W2_PTS_CONFIG_SELECT_MASK, (pipe)) +#define PIPE_C_DMC_W2_PTS_CONFIG_SELECT_MASK REG_GENMASK(18, 16) +#define PIPE_C_DMC_W2_PTS_CONFIG_SELECT(pipe) REG_FIELD_PREP(PIPE_C_DMC_W2_PTS_CONFIG_SELECT_MASK, (pipe)) +#define PIPE_B_DMC_W2_PTS_CONFIG_SELECT_MASK REG_GENMASK(10, 8) +#define PIPE_B_DMC_W2_PTS_CONFIG_SELECT(pipe) REG_FIELD_PREP(PIPE_B_DMC_W2_PTS_CONFIG_SELECT_MASK, (pipe)) +#define PIPE_A_DMC_W2_PTS_CONFIG_SELECT_MASK REG_GENMASK(2, 0) +#define PIPE_A_DMC_W2_PTS_CONFIG_SELECT(pipe) REG_FIELD_PREP(PIPE_A_DMC_W2_PTS_CONFIG_SELECT_MASK, (pipe)) + +/* plane/general flip queue entries */ +#define PIPEDMC_FQ_RAM(start_mmioaddr, i) _MMIO((start_mmioaddr) + (i) * 4) +/* LNL */ +/* DW0 pts */ +/* DW1 head */ +/* DW2 size/etc. */ +#define LNL_FQ_INTERRUPT REG_BIT(31) +#define LNL_FQ_DSB_ID_MASK REG_GENMASK(30, 29) +#define LNL_FQ_DSB_ID(dsb_id) REG_FIELD_PREP(LNL_FQ_DSB_ID_MASK, (dsb_id)) +#define LNL_FQ_EXECUTED REG_BIT(28) +#define LNL_FQ_DSB_SIZE_MASK REG_GENMASK(15, 0) +#define LNL_FQ_DSB_SIZE(size) REG_FIELD_PREP(LNL_FQ_DSB_SIZE_MASK, (size)) +/* DW3 reserved (plane queues) */ +/* DW3 second DSB head (general queue) */ +/* DW4 second DSB size/etc. (general queue) */ +/* DW5 reserved (general queue) */ + +/* PTL+ */ +/* DW0 pts */ +/* DW1 reserved */ +/* DW2 size/etc. */ +#define PTL_FQ_INTERRUPT REG_BIT(31) +#define PTL_FQ_NEED_PUSH REG_BIT(30) +#define PTL_FQ_BLOCK_PUSH REG_BIT(29) +#define PTL_FQ_EXECUTED REG_BIT(28) +#define PTL_FQ_DSB_ID_MASK REG_GENMASK(25, 24) +#define PTL_FQ_DSB_ID(dsb_id) REG_FIELD_PREP(PTL_FQ_DSB_ID_MASK, (dsb_id)) +#define PTL_FQ_DSB_SIZE_MASK REG_GENMASK(15, 0) +#define PTL_FQ_DSB_SIZE(size) REG_FIELD_PREP(PTL_FQ_DSB_SIZE_MASK, (size)) +/* DW3 head */ +/* DW4 second DSB size/etc. (general queue) */ +/* DW5 second DSB head (general queue) */ + +/* undocumented magic DMC variables */ +#define PTL_PIPEDMC_EXEC_TIME_LINES(start_mmioaddr) _MMIO((start_mmioaddr) + 0x6b8) +#define PTL_PIPEDMC_END_OF_EXEC_GB(start_mmioaddr) _MMIO((start_mmioaddr) + 0x6c0) + #endif /* __INTEL_DMC_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dmc_wl.c b/drivers/gpu/drm/i915/display/intel_dmc_wl.c index 44b3ee5c9be4..b3bb89ba34f9 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc_wl.c +++ b/drivers/gpu/drm/i915/display/intel_dmc_wl.c @@ -7,7 +7,6 @@ #include <drm/drm_print.h> -#include "i915_drv.h" #include "intel_de.h" #include "intel_display_regs.h" #include "intel_dmc_regs.h" @@ -155,12 +154,11 @@ static const struct intel_dmc_wl_range xe3lpd_dc3co_dmc_ranges[] = { static void __intel_dmc_wl_release(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_dmc_wl *wl = &display->wl; WARN_ON(refcount_read(&wl->refcount)); - queue_delayed_work(i915->unordered_wq, &wl->work, + queue_delayed_work(display->wq.unordered, &wl->work, msecs_to_jiffies(DMC_WAKELOCK_HOLD_TIME)); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 277b40b13948..2eab591a8ef5 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/log2.h> #include <linux/math.h> #include <linux/notifier.h> @@ -174,7 +175,6 @@ int intel_dp_link_symbol_clock(int rate) static int max_dprx_rate(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int max_rate; if (intel_dp_tunnel_bw_alloc_is_enabled(intel_dp)) @@ -183,16 +183,13 @@ static int max_dprx_rate(struct intel_dp *intel_dp) max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); /* - * Some broken eDP sinks illegally declare support for - * HBR3 without TPS4, and are unable to produce a stable - * output. Reject HBR3 when TPS4 is not available. + * Some platforms + eDP panels may not reliably support HBR3 + * due to signal integrity limitations, despite advertising it. + * Cap the link rate to HBR2 to avoid unstable configurations for the + * known machines. */ - if (max_rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { - drm_dbg_kms(display->drm, - "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", - encoder->base.base.id, encoder->base.name); - max_rate = 540000; - } + if (intel_dp_is_edp(intel_dp) && intel_has_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2)) + max_rate = min(max_rate, 540000); return max_rate; } @@ -1418,6 +1415,7 @@ intel_dp_mode_valid(struct drm_connector *_connector, struct intel_display *display = to_intel_display(_connector->dev); struct intel_connector *connector = to_intel_connector(_connector); struct intel_dp *intel_dp = intel_attached_dp(connector); + enum intel_output_format sink_format, output_format; const struct drm_display_mode *fixed_mode; int target_clock = mode->clock; int max_rate, mode_rate, max_lanes, max_link_clock; @@ -1451,6 +1449,13 @@ intel_dp_mode_valid(struct drm_connector *_connector, mode->hdisplay, target_clock); max_dotclk *= num_joined_pipes; + sink_format = intel_dp_sink_format(connector, mode); + output_format = intel_dp_output_format(connector, sink_format); + + status = intel_pfit_mode_valid(display, mode, output_format, num_joined_pipes); + if (status != MODE_OK) + return status; + if (target_clock > max_dotclk) return MODE_CLOCK_HIGH; @@ -1466,11 +1471,8 @@ intel_dp_mode_valid(struct drm_connector *_connector, intel_dp_mode_min_output_bpp(connector, mode)); if (intel_dp_has_dsc(connector)) { - enum intel_output_format sink_format, output_format; int pipe_bpp; - sink_format = intel_dp_sink_format(connector, mode); - output_format = intel_dp_output_format(connector, sink_format); /* * TBD pass the connector BPC, * for now U8_MAX so that max BPC on that platform would be picked @@ -1606,6 +1608,12 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select) { + struct intel_display *display = to_intel_display(intel_dp); + + /* FIXME g4x can't generate an exact 2.7GHz with the 96MHz non-SSC refclk */ + if (display->platform.g4x && port_clock == 268800) + port_clock = 270000; + /* eDP 1.4 rate select method. */ if (intel_dp->use_rate_select) { *link_bw = 0; @@ -2529,13 +2537,15 @@ intel_dp_dsc_compute_pipe_bpp_limits(struct intel_dp *intel_dp, bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, struct link_config_limits *limits) { bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + struct intel_connector *connector = + to_intel_connector(conn_state->connector); limits->min_rate = intel_dp_min_link_rate(intel_dp); limits->max_rate = intel_dp_max_link_rate(intel_dp); @@ -2545,7 +2555,8 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, limits->min_lane_count = intel_dp_min_lane_count(intel_dp); limits->max_lane_count = intel_dp_max_lane_count(intel_dp); - limits->pipe.min_bpp = intel_dp_min_bpp(crtc_state->output_format); + limits->pipe.min_bpp = intel_dp_in_hdr_mode(conn_state) ? 30 : + intel_dp_min_bpp(crtc_state->output_format); if (is_mst) { /* * FIXME: If all the streams can't fit into the link with their @@ -2644,7 +2655,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !intel_dp_compute_config_limits(intel_dp, connector, pipe_config, + !intel_dp_compute_config_limits(intel_dp, conn_state, pipe_config, respect_downstream_limits, false, &limits); @@ -2678,7 +2689,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); - if (!intel_dp_compute_config_limits(intel_dp, connector, pipe_config, + if (!intel_dp_compute_config_limits(intel_dp, conn_state, pipe_config, respect_downstream_limits, true, &limits)) @@ -2910,6 +2921,19 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, } } +bool +intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state) +{ + struct hdr_output_metadata *hdr_metadata; + + if (!conn_state->hdr_output_metadata) + return false; + + hdr_metadata = conn_state->hdr_output_metadata->data; + + return hdr_metadata->hdmi_metadata_type1.eotf == HDMI_EOTF_SMPTE_ST2084; +} + static void intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, @@ -3175,7 +3199,26 @@ int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state, */ min_hblank = min_hblank - 2; - min_hblank = min(10, min_hblank); + /* + * min_hblank formula is undergoing a change, to avoid underrun use the + * recomended value in spec to compare with the calculated one and use the + * minimum value + */ + if (intel_dp_is_uhbr(crtc_state)) { + /* + * Note: Bspec requires a min_hblank of 2 for YCBCR420 + * with compressed bpp 6, but the minimum compressed bpp + * supported by the driver is 8. + */ + drm_WARN_ON(display->drm, + (crtc_state->dsc.compression_enable && + crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && + crtc_state->dsc.compressed_bpp_x16 < fxp_q4_from_int(8))); + min_hblank = min(3, min_hblank); + } else { + min_hblank = min(10, min_hblank); + } + crtc_state->min_hblank = min_hblank; return 0; @@ -3726,6 +3769,9 @@ static void intel_dp_get_pcon_dsc_cap(struct intel_dp *intel_dp) memset(intel_dp->pcon_dsc_dpcd, 0, sizeof(intel_dp->pcon_dsc_dpcd)); + if (!drm_dp_is_branch(intel_dp->dpcd)) + return; + if (drm_dp_dpcd_read(&intel_dp->aux, DP_PCON_DSC_ENCODER, intel_dp->pcon_dsc_dpcd, sizeof(intel_dp->pcon_dsc_dpcd)) < 0) @@ -3833,10 +3879,11 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) if (ret < 0) return ret; /* Wait for PCON to be FRL Ready */ - wait_for(is_active = drm_dp_pcon_is_frl_ready(&intel_dp->aux) == true, TIMEOUT_FRL_READY_MS); - - if (!is_active) - return -ETIMEDOUT; + ret = poll_timeout_us(is_active = drm_dp_pcon_is_frl_ready(&intel_dp->aux), + is_active, + 1000, TIMEOUT_FRL_READY_MS * 1000, false); + if (ret) + return ret; ret = drm_dp_pcon_frl_configure_1(&intel_dp->aux, max_frl_bw, DP_PCON_ENABLE_SEQUENTIAL_LINK); @@ -3853,12 +3900,11 @@ static int intel_dp_pcon_start_frl_training(struct intel_dp *intel_dp) * Wait for FRL to be completed * Check if the HDMI Link is up and active. */ - wait_for(is_active = - intel_dp_pcon_is_frl_trained(intel_dp, max_frl_bw_mask, &frl_trained_mask), - TIMEOUT_HDMI_LINK_ACTIVE_MS); - - if (!is_active) - return -ETIMEDOUT; + ret = poll_timeout_us(is_active = intel_dp_pcon_is_frl_trained(intel_dp, max_frl_bw_mask, &frl_trained_mask), + is_active, + 1000, TIMEOUT_HDMI_LINK_ACTIVE_MS * 1000, false); + if (ret) + return ret; frl_trained: drm_dbg(display->drm, "FRL_TRAINED_MASK = %u\n", frl_trained_mask); @@ -4268,10 +4314,26 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) } static void +intel_edp_set_data_override_rates(struct intel_dp *intel_dp) +{ + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int *sink_rates = intel_dp->sink_rates; + int i, count = 0; + + for (i = 0; i < intel_dp->num_sink_rates; i++) { + if (intel_bios_encoder_reject_edp_rate(encoder->devdata, + intel_dp->sink_rates[i])) + continue; + + sink_rates[count++] = intel_dp->sink_rates[i]; + } + intel_dp->num_sink_rates = count; +} + +static void intel_edp_set_sink_rates(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; intel_dp->num_sink_rates = 0; @@ -4297,16 +4359,13 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) break; /* - * Some broken eDP sinks illegally declare support for - * HBR3 without TPS4, and are unable to produce a stable - * output. Reject HBR3 when TPS4 is not available. + * Some platforms cannot reliably drive HBR3 rates due to PHY limitations, + * even if the sink advertises support. Reject any sink rates above HBR2 on + * the known machines for stable output. */ - if (rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { - drm_dbg_kms(display->drm, - "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", - encoder->base.base.id, encoder->base.name); + if (rate > 540000 && + intel_has_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2)) break; - } intel_dp->sink_rates[i] = rate; } @@ -4321,6 +4380,8 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) intel_dp->use_rate_select = true; else intel_dp_set_sink_rates(intel_dp); + + intel_edp_set_data_override_rates(intel_dp); } static bool @@ -5602,14 +5663,9 @@ bool intel_digital_port_connected_locked(struct intel_encoder *encoder) intel_wakeref_t wakeref; with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) { - unsigned long wait_expires = jiffies + msecs_to_jiffies_timeout(4); - - do { - is_connected = dig_port->connected(encoder); - if (is_connected || is_glitch_free) - break; - usleep_range(10, 30); - } while (time_before(jiffies, wait_expires)); + poll_timeout_us(is_connected = dig_port->connected(encoder), + is_connected || is_glitch_free, + 30, 4000, false); } return is_connected; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 0657f5681196..f90cfd1dbbd0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -193,7 +193,7 @@ void intel_dp_wait_source_oui(struct intel_dp *intel_dp); int intel_dp_output_bpp(enum intel_output_format output_format, int bpp); bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, @@ -214,5 +214,6 @@ int intel_dp_compute_min_hblank(struct intel_crtc_state *crtc_state, int intel_dp_dsc_bpp_step_x16(const struct intel_connector *connector); void intel_dp_dpcd_set_probe(struct intel_dp *intel_dp, bool force_on_external); +bool intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state); #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 5537136c367a..eb05ef4bd9f6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -225,19 +225,6 @@ intel_dp_aux_hdr_set_aux_backlight(const struct drm_connector_state *conn_state, connector->base.base.id, connector->base.name); } -static bool -intel_dp_in_hdr_mode(const struct drm_connector_state *conn_state) -{ - struct hdr_output_metadata *hdr_metadata; - - if (!conn_state->hdr_output_metadata) - return false; - - hdr_metadata = conn_state->hdr_output_metadata->data; - - return hdr_metadata->hdmi_metadata_type1.eotf == HDMI_EOTF_SMPTE_ST2084; -} - static void intel_dp_aux_hdr_set_backlight(const struct drm_connector_state *conn_state, u32 level) { @@ -475,31 +462,6 @@ static u32 intel_dp_aux_vesa_get_backlight(struct intel_connector *connector, en return connector->panel.backlight.level; } -static int -intel_dp_aux_vesa_set_luminance(struct intel_connector *connector, u32 level) -{ - struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - u8 buf[3]; - int ret; - - level = level * 1000; - level &= 0xffffff; - buf[0] = (level & 0x0000ff); - buf[1] = (level & 0x00ff00) >> 8; - buf[2] = (level & 0xff0000) >> 16; - - ret = drm_dp_dpcd_write(&intel_dp->aux, DP_EDP_PANEL_TARGET_LUMINANCE_VALUE, - buf, sizeof(buf)); - if (ret != sizeof(buf)) { - drm_err(intel_dp->aux.drm_dev, - "%s: Failed to set VESA Aux Luminance: %d\n", - intel_dp->aux.name, ret); - return -EINVAL; - } else { - return 0; - } -} - static void intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, u32 level) { @@ -507,11 +469,6 @@ intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, u3 struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (panel->backlight.edp.vesa.luminance_control_support) { - if (!intel_dp_aux_vesa_set_luminance(connector, level)) - return; - } - if (!panel->backlight.edp.vesa.info.aux_set) { const u32 pwm_level = intel_backlight_level_to_pwm(connector, level); @@ -528,18 +485,6 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state, struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - int ret; - - if (panel->backlight.edp.vesa.luminance_control_support) { - ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, - DP_EDP_PANEL_LUMINANCE_CONTROL_ENABLE); - - if (ret == 1) - return; - - if (!intel_dp_aux_vesa_set_luminance(connector, level)) - return; - } if (!panel->backlight.edp.vesa.info.aux_enable) { u32 pwm_level; @@ -563,9 +508,6 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (panel->backlight.edp.vesa.luminance_control_support) - return; - drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); if (!panel->backlight.edp.vesa.info.aux_enable) @@ -580,13 +522,41 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, &connector->base.display_info.luminance_range; struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_panel *panel = &connector->panel; - u16 current_level; + u32 current_level; u8 current_mode; int ret; - if (panel->backlight.edp.vesa.luminance_control_support) { + ret = drm_edp_backlight_init(&intel_dp->aux, &panel->backlight.edp.vesa.info, + luminance_range->max_luminance, + panel->vbt.backlight.pwm_freq_hz, + intel_dp->edp_dpcd, ¤t_level, ¤t_mode, + panel->backlight.edp.vesa.luminance_control_support); + if (ret < 0) + return ret; + + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", + connector->base.base.id, connector->base.name, + dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_enable)); + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", + connector->base.base.id, connector->base.name, + dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_set)); + + if (!panel->backlight.edp.vesa.info.aux_set || + !panel->backlight.edp.vesa.info.aux_enable) { + ret = panel->backlight.pwm_funcs->setup(connector, pipe); + if (ret < 0) { + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to setup PWM backlight controls for eDP backlight: %d\n", + connector->base.base.id, connector->base.name, ret); + return ret; + } + } + + if (panel->backlight.edp.vesa.info.luminance_set) { if (luminance_range->max_luminance) { - panel->backlight.max = luminance_range->max_luminance; + panel->backlight.max = panel->backlight.edp.vesa.info.max; panel->backlight.min = luminance_range->min_luminance; } else { panel->backlight.max = 512; @@ -597,54 +567,26 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] AUX VESA Nits backlight level is controlled through DPCD\n", connector->base.base.id, connector->base.name); - } else { - ret = drm_edp_backlight_init(&intel_dp->aux, &panel->backlight.edp.vesa.info, - panel->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, - ¤t_level, ¤t_mode); - if (ret < 0) - return ret; - - drm_dbg_kms(display->drm, - "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", - connector->base.base.id, connector->base.name, - dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_enable)); - drm_dbg_kms(display->drm, - "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", - connector->base.base.id, connector->base.name, - dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_set)); - - if (!panel->backlight.edp.vesa.info.aux_set || - !panel->backlight.edp.vesa.info.aux_enable) { - ret = panel->backlight.pwm_funcs->setup(connector, pipe); - if (ret < 0) { - drm_err(display->drm, - "[CONNECTOR:%d:%s] Failed to setup PWM backlight controls for eDP backlight: %d\n", - connector->base.base.id, connector->base.name, ret); - return ret; - } + } else if (panel->backlight.edp.vesa.info.aux_set) { + panel->backlight.max = panel->backlight.edp.vesa.info.max; + panel->backlight.min = 0; + if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { + panel->backlight.level = current_level; + panel->backlight.enabled = panel->backlight.level != 0; + } else { + panel->backlight.level = panel->backlight.max; + panel->backlight.enabled = false; } - - if (panel->backlight.edp.vesa.info.aux_set) { - panel->backlight.max = panel->backlight.edp.vesa.info.max; - panel->backlight.min = 0; - if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { - panel->backlight.level = current_level; - panel->backlight.enabled = panel->backlight.level != 0; - } else { - panel->backlight.level = panel->backlight.max; - panel->backlight.enabled = false; - } + } else { + panel->backlight.max = panel->backlight.pwm_level_max; + panel->backlight.min = panel->backlight.pwm_level_min; + if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_PWM) { + panel->backlight.level = + panel->backlight.pwm_funcs->get(connector, pipe); + panel->backlight.enabled = panel->backlight.pwm_enabled; } else { - panel->backlight.max = panel->backlight.pwm_level_max; - panel->backlight.min = panel->backlight.pwm_level_min; - if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_PWM) { - panel->backlight.level = - panel->backlight.pwm_funcs->get(connector, pipe); - panel->backlight.enabled = panel->backlight.pwm_enabled; - } else { - panel->backlight.level = panel->backlight.max; - panel->backlight.enabled = false; - } + panel->backlight.level = panel->backlight.max; + panel->backlight.enabled = false; } } diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 7bd775fb65a0..bd757db85927 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -805,10 +805,16 @@ intel_dp_mst_hdcp2_stream_encryption(struct intel_connector *connector, enum pipe pipe = (enum pipe)cpu_transcoder; enum port port = dig_port->base.port; int ret; - - drm_WARN_ON(display->drm, enable && - !!(intel_de_read(display, HDCP2_AUTH_STREAM(display, cpu_transcoder, port)) - & AUTH_STREAM_TYPE) != data->streams[0].stream_type); + u32 val; + u8 stream_type; + + if (DISPLAY_VER(display) < 30) { + val = intel_de_read(display, + HDCP2_AUTH_STREAM(display, cpu_transcoder, port)); + stream_type = REG_FIELD_GET(AUTH_STREAM_TYPE_MASK, val); + drm_WARN_ON(display->drm, enable && + stream_type != data->streams[0].stream_type); + } ret = intel_dp_mst_toggle_hdcp_stream_select(connector, enable); if (ret) @@ -824,6 +830,14 @@ intel_dp_mst_hdcp2_stream_encryption(struct intel_connector *connector, return -ETIMEDOUT; } + if (DISPLAY_VER(display) >= 30) { + val = intel_de_read(display, + HDCP2_STREAM_STATUS(display, cpu_transcoder, port)); + stream_type = REG_FIELD_GET(STREAM_TYPE_STATUS_MASK, val); + drm_WARN_ON(display->drm, enable && + stream_type != data->streams[0].stream_type); + } + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index a479b63112ea..27f3716bdc1f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -22,6 +22,7 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> #include <drm/display/drm_dp_helper.h> #include <drm/drm_print.h> @@ -478,12 +479,13 @@ static u8 intel_dp_get_lane_adjust_train(struct intel_dp *intel_dp, _TRAIN_REQ_TX_FFE_ARGS(link_status, 2), \ _TRAIN_REQ_TX_FFE_ARGS(link_status, 3) -void +bool intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, const u8 link_status[DP_LINK_STATUS_SIZE]) { + bool changed = false; int lane; if (intel_dp_is_uhbr(crtc_state)) { @@ -502,10 +504,17 @@ intel_dp_get_adjust_train(struct intel_dp *intel_dp, TRAIN_REQ_PREEMPH_ARGS(link_status)); } - for (lane = 0; lane < 4; lane++) - intel_dp->train_set[lane] = - intel_dp_get_lane_adjust_train(intel_dp, crtc_state, - dp_phy, link_status, lane); + for (lane = 0; lane < 4; lane++) { + u8 new = intel_dp_get_lane_adjust_train(intel_dp, crtc_state, + dp_phy, link_status, lane); + if (intel_dp->train_set[lane] == new) + continue; + + intel_dp->train_set[lane] = new; + changed = true; + } + + return changed; } static int intel_dp_training_pattern_set_reg(struct intel_dp *intel_dp, @@ -758,6 +767,63 @@ void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, } } +/* + * Pick Training Pattern Sequence (TPS) for channel equalization. 128b/132b TPS2 + * for UHBR+, TPS4 for HBR3 or for 1.4 devices that support it, TPS3 for HBR2 or + * 1.2 devices that support it, TPS2 otherwise. + */ +static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + enum drm_dp_phy dp_phy) +{ + struct intel_display *display = to_intel_display(intel_dp); + bool source_tps3, sink_tps3, source_tps4, sink_tps4; + + /* UHBR+ use separate 128b/132b TPS2 */ + if (intel_dp_is_uhbr(crtc_state)) + return DP_TRAINING_PATTERN_2; + + /* + * TPS4 support is mandatory for all downstream devices that + * support HBR3. There are no known eDP panels that support + * TPS4 as of Feb 2018 as per VESA eDP_v1.4b_E1 specification. + * LTTPRs must support TPS4. + */ + source_tps4 = intel_dp_source_supports_tps4(display); + sink_tps4 = dp_phy != DP_PHY_DPRX || + drm_dp_tps4_supported(intel_dp->dpcd); + if (source_tps4 && sink_tps4) { + return DP_TRAINING_PATTERN_4; + } else if (crtc_state->port_clock == 810000) { + if (!source_tps4) + lt_dbg(intel_dp, dp_phy, + "8.1 Gbps link rate without source TPS4 support\n"); + if (!sink_tps4) + lt_dbg(intel_dp, dp_phy, + "8.1 Gbps link rate without sink TPS4 support\n"); + } + + /* + * TPS3 support is mandatory for downstream devices that + * support HBR2. However, not all sinks follow the spec. + */ + source_tps3 = intel_dp_source_supports_tps3(display); + sink_tps3 = dp_phy != DP_PHY_DPRX || + drm_dp_tps3_supported(intel_dp->dpcd); + if (source_tps3 && sink_tps3) { + return DP_TRAINING_PATTERN_3; + } else if (crtc_state->port_clock >= 540000) { + if (!source_tps3) + lt_dbg(intel_dp, dp_phy, + ">=5.4/6.48 Gbps link rate without source TPS3 support\n"); + if (!sink_tps3) + lt_dbg(intel_dp, dp_phy, + ">=5.4/6.48 Gbps link rate without sink TPS3 support\n"); + } + + return DP_TRAINING_PATTERN_2; +} + static void intel_dp_update_link_bw_set(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 link_bw, u8 rate_select) @@ -950,63 +1016,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp, } /* - * Pick Training Pattern Sequence (TPS) for channel equalization. 128b/132b TPS2 - * for UHBR+, TPS4 for HBR3 or for 1.4 devices that support it, TPS3 for HBR2 or - * 1.2 devices that support it, TPS2 otherwise. - */ -static u32 intel_dp_training_pattern(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - enum drm_dp_phy dp_phy) -{ - struct intel_display *display = to_intel_display(intel_dp); - bool source_tps3, sink_tps3, source_tps4, sink_tps4; - - /* UHBR+ use separate 128b/132b TPS2 */ - if (intel_dp_is_uhbr(crtc_state)) - return DP_TRAINING_PATTERN_2; - - /* - * TPS4 support is mandatory for all downstream devices that - * support HBR3. There are no known eDP panels that support - * TPS4 as of Feb 2018 as per VESA eDP_v1.4b_E1 specification. - * LTTPRs must support TPS4. - */ - source_tps4 = intel_dp_source_supports_tps4(display); - sink_tps4 = dp_phy != DP_PHY_DPRX || - drm_dp_tps4_supported(intel_dp->dpcd); - if (source_tps4 && sink_tps4) { - return DP_TRAINING_PATTERN_4; - } else if (crtc_state->port_clock == 810000) { - if (!source_tps4) - lt_dbg(intel_dp, dp_phy, - "8.1 Gbps link rate without source TPS4 support\n"); - if (!sink_tps4) - lt_dbg(intel_dp, dp_phy, - "8.1 Gbps link rate without sink TPS4 support\n"); - } - - /* - * TPS3 support is mandatory for downstream devices that - * support HBR2. However, not all sinks follow the spec. - */ - source_tps3 = intel_dp_source_supports_tps3(display); - sink_tps3 = dp_phy != DP_PHY_DPRX || - drm_dp_tps3_supported(intel_dp->dpcd); - if (source_tps3 && sink_tps3) { - return DP_TRAINING_PATTERN_3; - } else if (crtc_state->port_clock >= 540000) { - if (!source_tps3) - lt_dbg(intel_dp, dp_phy, - ">=5.4/6.48 Gbps link rate without source TPS3 support\n"); - if (!sink_tps3) - lt_dbg(intel_dp, dp_phy, - ">=5.4/6.48 Gbps link rate without sink TPS3 support\n"); - } - - return DP_TRAINING_PATTERN_2; -} - -/* * Perform the link training channel equalization phase on the given DP PHY * using one of training pattern 2, 3 or 4 depending on the source and * sink capabilities. @@ -1127,16 +1136,19 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp, { struct intel_display *display = to_intel_display(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int ret; intel_dp->link.active = true; - intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); intel_dp_program_link_training_pattern(intel_dp, crtc_state, DP_PHY_DPRX, DP_TRAINING_PATTERN_DISABLE); - if (intel_dp_is_uhbr(crtc_state) && - wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { - lt_dbg(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clearing\n"); + if (intel_dp_is_uhbr(crtc_state)) { + ret = poll_timeout_us(ret = intel_dp_128b132b_intra_hop(intel_dp, crtc_state), + ret == 0, + 500, 500 * 1000, false); + if (ret) + lt_dbg(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clearing\n"); } intel_hpd_unblock(encoder); @@ -1371,8 +1383,8 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp, if (ret) ret = intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); - if (intel_dp->set_idle_link_train) - intel_dp->set_idle_link_train(intel_dp, crtc_state); + intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); + intel_dp->set_idle_link_train(intel_dp, crtc_state); return ret; } @@ -1574,8 +1586,12 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, int lttpr_count) { bool passed = false; + int ret; - if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { + ret = poll_timeout_us(ret = intel_dp_128b132b_intra_hop(intel_dp, crtc_state), + ret == 0, + 500, 500 * 1000, false); + if (ret) { lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n"); goto out; } @@ -1602,6 +1618,8 @@ out: intel_dp_program_link_training_pattern(intel_dp, crtc_state, DP_PHY_DPRX, DP_TRAINING_PATTERN_2); + intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX); + return passed; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h index 46614124569f..1ba22ed6db08 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h @@ -23,7 +23,7 @@ void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, int link_bw, int rate_select, int lane_count, bool enhanced_framing); -void intel_dp_get_adjust_train(struct intel_dp *intel_dp, +bool intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, const u8 link_status[DP_LINK_STATUS_SIZE]); diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 74497c9a0554..352f7ef29c28 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -611,12 +611,15 @@ adjust_limits_for_dsc_hblank_expansion_quirk(struct intel_dp *intel_dp, static bool mst_stream_compute_config_limits(struct intel_dp *intel_dp, - struct intel_connector *connector, + struct drm_connector_state *conn_state, struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) { - if (!intel_dp_compute_config_limits(intel_dp, connector, + struct intel_connector *connector = + to_intel_connector(conn_state->connector); + + if (!intel_dp_compute_config_limits(intel_dp, conn_state, crtc_state, false, dsc, limits)) return false; @@ -665,7 +668,7 @@ static int mst_stream_compute_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !mst_stream_compute_config_limits(intel_dp, connector, + !mst_stream_compute_config_limits(intel_dp, conn_state, pipe_config, false, &limits); if (!dsc_needed) { @@ -691,7 +694,7 @@ static int mst_stream_compute_config(struct intel_encoder *encoder, str_yes_no(intel_dp->force_dsc_en)); - if (!mst_stream_compute_config_limits(intel_dp, connector, + if (!mst_stream_compute_config_limits(intel_dp, conn_state, pipe_config, true, &limits)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_dp_test.c b/drivers/gpu/drm/i915/display/intel_dp_test.c index 6ed5012c5fac..5cfa1dd411da 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_test.c +++ b/drivers/gpu/drm/i915/display/intel_dp_test.c @@ -6,7 +6,6 @@ #include <drm/display/drm_dp.h> #include <drm/display/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_file.h> #include <drm/drm_print.h> #include <drm/drm_probe_helper.h> @@ -753,13 +752,12 @@ static const struct { void intel_dp_test_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; int i; for (i = 0; i < ARRAY_SIZE(intel_display_debugfs_files); i++) { debugfs_create_file(intel_display_debugfs_files[i].name, 0644, - minor->debugfs_root, + display->drm->debugfs_root, display, intel_display_debugfs_files[i].fops); } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 33e0398120c8..8ea96cc524a1 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -2046,6 +2046,7 @@ static void bxt_ddi_pll_enable(struct intel_display *display, enum dpio_phy phy = DPIO_PHY0; enum dpio_channel ch = DPIO_CH0; u32 temp; + int ret; bxt_port_to_phy_channel(display, port, &phy, &ch); @@ -2056,8 +2057,10 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_POWER_ENABLE); - if (wait_for_us((intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_POWER_STATE), 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, PORT_PLL_POWER_STATE, + 200, 0, NULL); + if (ret) drm_err(display->drm, "Power state not set for PLL:%d\n", port); } @@ -2119,8 +2122,10 @@ static void bxt_ddi_pll_enable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), 0, PORT_PLL_ENABLE); intel_de_posting_read(display, BXT_PORT_PLL_ENABLE(port)); - if (wait_for_us((intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK), - 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_LOCK, PORT_PLL_LOCK, + 200, 0, NULL); + if (ret) drm_err(display->drm, "PLL %d not locked\n", port); if (display->platform.geminilake) { @@ -2144,6 +2149,7 @@ static void bxt_ddi_pll_disable(struct intel_display *display, struct intel_dpll *pll) { enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ + int ret; intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), PORT_PLL_ENABLE, 0); intel_de_posting_read(display, BXT_PORT_PLL_ENABLE(port)); @@ -2152,8 +2158,10 @@ static void bxt_ddi_pll_disable(struct intel_display *display, intel_de_rmw(display, BXT_PORT_PLL_ENABLE(port), PORT_PLL_POWER_ENABLE, 0); - if (wait_for_us(!(intel_de_read(display, BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_POWER_STATE), 200)) + ret = intel_de_wait_custom(display, BXT_PORT_PLL_ENABLE(port), + PORT_PLL_POWER_STATE, 0, + 200, 0, NULL); + if (ret) drm_err(display->drm, "Power state not reset for PLL:%d\n", port); } diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index aea249e2699f..c0a817018d08 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -33,8 +33,6 @@ i915_vm_to_dpt(struct i915_address_space *vm) return container_of(vm, struct i915_dpt, vm); } -#define dpt_total_entries(dpt) ((dpt)->vm.total >> PAGE_SHIFT) - static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { writeq(pte, addr); @@ -322,5 +320,5 @@ void intel_dpt_destroy(struct i915_address_space *vm) u64 intel_dpt_offset(struct i915_vma *dpt_vma) { - return dpt_vma->node.start; + return i915_vma_offset(dpt_vma); } diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index 3fa94510458d..0fdb32ef241c 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -5,7 +5,8 @@ #include <linux/debugfs.h> -#include "i915_drv.h" +#include <drm/drm_print.h> + #include "intel_atomic.h" #include "intel_de.h" #include "intel_display_regs.h" @@ -123,9 +124,9 @@ static void intel_drrs_set_state(struct intel_crtc *crtc, static void intel_drrs_schedule_work(struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); - mod_delayed_work(i915->unordered_wq, &crtc->drrs.work, msecs_to_jiffies(1000)); + mod_delayed_work(display->wq.unordered, &crtc->drrs.work, msecs_to_jiffies(1000)); } static unsigned int intel_drrs_frontbuffer_bits(const struct intel_crtc_state *crtc_state) diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 53d8ae3a70e9..dee44d45b668 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -4,10 +4,11 @@ * */ +#include <linux/iopoll.h> + #include <drm/drm_print.h> #include <drm/drm_vblank.h> -#include "i915_utils.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_regs.h" @@ -871,8 +872,13 @@ void intel_dsb_wait(struct intel_dsb *dsb) struct intel_crtc *crtc = dsb->crtc; struct intel_display *display = to_intel_display(crtc->base.dev); enum pipe pipe = crtc->pipe; + bool is_busy; + int ret; - if (wait_for(!is_dsb_busy(display, pipe, dsb->id), 1)) { + ret = poll_timeout_us(is_busy = is_dsb_busy(display, pipe, dsb->id), + !is_busy, + 100, 1000, false); + if (ret) { u32 offset = intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf); intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index e6a851d276f8..23402408e172 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -777,7 +777,7 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) intel_dsi->init_count = mipi_config->master_init_timer; intel_dsi->bw_timer = mipi_config->dbi_bw_timer; intel_dsi->video_frmt_cfg_bits = - mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0; + mipi_config->bta_disable ? DISABLE_VIDEO_BTA : 0; intel_dsi->bgr_enabled = mipi_config->rgb_flip; /* Starting point, adjusted depending on dual link and burst mode */ diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h new file mode 100644 index 000000000000..edc7331dcca2 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt_defs.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_DSI_VBT_DEFS_H__ +#define __INTEL_DSI_VBT_DEFS_H__ + +#include <linux/types.h> + +/* + * MIPI Sequence Block definitions + * + * Note the VBT spec has AssertReset / DeassertReset swapped from their + * usual naming, we use the proper names here to avoid confusion when + * reading the code. + */ +enum mipi_seq { + MIPI_SEQ_END = 0, + MIPI_SEQ_DEASSERT_RESET, /* Spec says MipiAssertResetPin */ + MIPI_SEQ_INIT_OTP, + MIPI_SEQ_DISPLAY_ON, + MIPI_SEQ_DISPLAY_OFF, + MIPI_SEQ_ASSERT_RESET, /* Spec says MipiDeassertResetPin */ + MIPI_SEQ_BACKLIGHT_ON, /* sequence block v2+ */ + MIPI_SEQ_BACKLIGHT_OFF, /* sequence block v2+ */ + MIPI_SEQ_TEAR_ON, /* sequence block v2+ */ + MIPI_SEQ_TEAR_OFF, /* sequence block v3+ */ + MIPI_SEQ_POWER_ON, /* sequence block v3+ */ + MIPI_SEQ_POWER_OFF, /* sequence block v3+ */ + MIPI_SEQ_MAX +}; + +enum mipi_seq_element { + MIPI_SEQ_ELEM_END = 0, + MIPI_SEQ_ELEM_SEND_PKT, + MIPI_SEQ_ELEM_DELAY, + MIPI_SEQ_ELEM_GPIO, + MIPI_SEQ_ELEM_I2C, /* sequence block v2+ */ + MIPI_SEQ_ELEM_SPI, /* sequence block v3+ */ + MIPI_SEQ_ELEM_PMIC, /* sequence block v3+ */ + MIPI_SEQ_ELEM_MAX +}; + +#define MIPI_DSI_UNDEFINED_PANEL_ID 0 +#define MIPI_DSI_GENERIC_PANEL_ID 1 + +struct mipi_config { + u16 panel_id; + + /* General Params */ + struct { + u32 enable_dithering:1; + u32 rsvd1:1; + u32 is_bridge:1; + + u32 panel_arch_type:2; + u32 is_cmd_mode:1; + +#define NON_BURST_SYNC_PULSE 0x1 +#define NON_BURST_SYNC_EVENTS 0x2 +#define BURST_MODE 0x3 + u32 video_transfer_mode:2; + + u32 cabc_supported:1; +#define PPS_BLC_PMIC 0 +#define PPS_BLC_SOC 1 + u32 pwm_blc:1; + +#define PIXEL_FORMAT_RGB565 0x1 +#define PIXEL_FORMAT_RGB666 0x2 +#define PIXEL_FORMAT_RGB666_LOOSELY_PACKED 0x3 +#define PIXEL_FORMAT_RGB888 0x4 + u32 videomode_color_format:4; + +#define ENABLE_ROTATION_0 0x0 +#define ENABLE_ROTATION_90 0x1 +#define ENABLE_ROTATION_180 0x2 +#define ENABLE_ROTATION_270 0x3 + u32 rotation:2; + u32 bta_disable:1; + u32 rsvd2:15; + } __packed; + + /* Port Desc */ + struct { +#define DUAL_LINK_NOT_SUPPORTED 0 +#define DUAL_LINK_FRONT_BACK 1 +#define DUAL_LINK_PIXEL_ALT 2 + u16 dual_link:2; + u16 lane_cnt:2; + u16 pixel_overlap:3; + u16 rgb_flip:1; +#define DL_DCS_PORT_A 0x00 +#define DL_DCS_PORT_C 0x01 +#define DL_DCS_PORT_A_AND_C 0x02 + u16 dl_dcs_cabc_ports:2; + u16 dl_dcs_backlight_ports:2; + u16 port_sync:1; /* 219-230 */ + u16 rsvd3:3; + } __packed; + + /* DSI Controller Parameters */ + struct { + u16 dsi_usage:1; + u16 rsvd4:15; + } __packed; + + u8 rsvd5; + u32 target_burst_mode_freq; + u32 dsi_ddr_clk; + u32 bridge_ref_clk; + + /* LP Byte Clock */ + struct { +#define BYTE_CLK_SEL_20MHZ 0 +#define BYTE_CLK_SEL_10MHZ 1 +#define BYTE_CLK_SEL_5MHZ 2 + u8 byte_clk_sel:2; + u8 rsvd6:6; + } __packed; + + /* DPhy Flags */ + struct { + u16 dphy_param_valid:1; + u16 eot_pkt_disabled:1; + u16 enable_clk_stop:1; + u16 blanking_packets_during_bllp:1; /* 219+ */ + u16 lp_clock_during_lpm:1; /* 219+ */ + u16 rsvd7:11; + } __packed; + + u32 hs_tx_timeout; + u32 lp_rx_timeout; + u32 turn_around_timeout; + u32 device_reset_timer; + u32 master_init_timer; + u32 dbi_bw_timer; + u32 lp_byte_clk_val; + + /* DPhy Params */ + struct { + u32 prepare_cnt:6; + u32 rsvd8:2; + u32 clk_zero_cnt:8; + u32 trail_cnt:5; + u32 rsvd9:3; + u32 exit_zero_cnt:6; + u32 rsvd10:2; + } __packed; + + u32 clk_lane_switch_cnt; + u32 hl_switch_cnt; + + u32 rsvd11[6]; + + /* timings based on dphy spec */ + u8 tclk_miss; + u8 tclk_post; + u8 rsvd12; + u8 tclk_pre; + u8 tclk_prepare; + u8 tclk_settle; + u8 tclk_term_enable; + u8 tclk_trail; + u16 tclk_prepare_clkzero; + u8 rsvd13; + u8 td_term_enable; + u8 teot; + u8 ths_exit; + u8 ths_prepare; + u16 ths_prepare_hszero; + u8 rsvd14; + u8 ths_settle; + u8 ths_skip; + u8 ths_trail; + u8 tinit; + u8 tlpx; + u8 rsvd15[3]; + + /* GPIOs */ + u8 panel_enable; + u8 bl_enable; + u8 pwm_enable; + u8 reset_r_n; + u8 pwr_down_r; + u8 stdby_r_n; +} __packed; + +/* all delays have a unit of 100us */ +struct mipi_pps_data { + u16 panel_on_delay; + u16 bl_enable_delay; + u16 bl_disable_delay; + u16 panel_off_delay; + u16 panel_power_cycle_delay; +} __packed; + +#endif /* __INTEL_DSI_VBT_DEFS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_encoder.c b/drivers/gpu/drm/i915/display/intel_encoder.c index bad452ad979a..2ffe1f251ef8 100644 --- a/drivers/gpu/drm/i915/display/intel_encoder.c +++ b/drivers/gpu/drm/i915/display/intel_encoder.c @@ -5,10 +5,10 @@ #include <linux/workqueue.h> -#include "i915_drv.h" #include "intel_display_core.h" #include "intel_display_types.h" #include "intel_encoder.h" +#include "intel_hotplug.h" static void intel_encoder_link_check_work_fn(struct work_struct *work) { @@ -32,12 +32,34 @@ void intel_encoder_link_check_flush_work(struct intel_encoder *encoder) void intel_encoder_link_check_queue_work(struct intel_encoder *encoder, int delay_ms) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); - mod_delayed_work(i915->unordered_wq, + mod_delayed_work(display->wq.unordered, &encoder->link_check_work, msecs_to_jiffies(delay_ms)); } +void intel_encoder_unblock_all_hpds(struct intel_display *display) +{ + struct intel_encoder *encoder; + + if (!HAS_DISPLAY(display)) + return; + + for_each_intel_encoder(display->drm, encoder) + intel_hpd_unblock(encoder); +} + +void intel_encoder_block_all_hpds(struct intel_display *display) +{ + struct intel_encoder *encoder; + + if (!HAS_DISPLAY(display)) + return; + + for_each_intel_encoder(display->drm, encoder) + intel_hpd_block(encoder); +} + void intel_encoder_suspend_all(struct intel_display *display) { struct intel_encoder *encoder; @@ -81,3 +103,21 @@ void intel_encoder_shutdown_all(struct intel_display *display) if (encoder->shutdown_complete) encoder->shutdown_complete(encoder); } + +struct intel_digital_port *intel_dig_port_alloc(void) +{ + struct intel_digital_port *dig_port; + + dig_port = kzalloc(sizeof(*dig_port), GFP_KERNEL); + if (!dig_port) + return NULL; + + dig_port->hdmi.hdmi_reg = INVALID_MMIO_REG; + dig_port->dp.output_reg = INVALID_MMIO_REG; + dig_port->aux_ch = AUX_CH_NONE; + dig_port->max_lanes = 4; + + mutex_init(&dig_port->hdcp.mutex); + + return dig_port; +} diff --git a/drivers/gpu/drm/i915/display/intel_encoder.h b/drivers/gpu/drm/i915/display/intel_encoder.h index 3fa5589f0b1c..ace0fe1a8f27 100644 --- a/drivers/gpu/drm/i915/display/intel_encoder.h +++ b/drivers/gpu/drm/i915/display/intel_encoder.h @@ -6,6 +6,7 @@ #ifndef __INTEL_ENCODER_H__ #define __INTEL_ENCODER_H__ +struct intel_digital_port; struct intel_display; struct intel_encoder; @@ -17,4 +18,9 @@ void intel_encoder_link_check_flush_work(struct intel_encoder *encoder); void intel_encoder_suspend_all(struct intel_display *display); void intel_encoder_shutdown_all(struct intel_display *display); +void intel_encoder_block_all_hpds(struct intel_display *display); +void intel_encoder_unblock_all_hpds(struct intel_display *display); + +struct intel_digital_port *intel_dig_port_alloc(void); + #endif /* __INTEL_ENCODER_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index a5906cb4900c..c48384e58ea1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -11,7 +11,7 @@ #include <drm/drm_modeset_helper.h> #include "i915_drv.h" -#include "intel_atomic_plane.h" +#include "i915_utils.h" #include "intel_bo.h" #include "intel_display.h" #include "intel_display_core.h" @@ -20,6 +20,8 @@ #include "intel_fb.h" #include "intel_fb_bo.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" +#include "intel_plane.h" #define check_array_bounds(display, a, i) drm_WARN_ON((display)->drm, (i) >= ARRAY_SIZE(a)) @@ -422,21 +424,22 @@ unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) /** * intel_fb_get_format_info: Get a modifier specific format information - * @cmd: FB add command structure + * @pixel_format: pixel format + * @modifier: modifier * * Returns: - * Returns the format information for @cmd->pixel_format specific to @cmd->modifier[0], + * Returns the format information for @pixel_format specific to @modifier, * or %NULL if the modifier doesn't override the format. */ const struct drm_format_info * -intel_fb_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +intel_fb_get_format_info(u32 pixel_format, u64 modifier) { - const struct intel_modifier_desc *md = lookup_modifier_or_null(cmd->modifier[0]); + const struct intel_modifier_desc *md = lookup_modifier_or_null(modifier); if (!md || !md->formats) return NULL; - return lookup_format_info(md->formats, md->format_count, cmd->pixel_format); + return lookup_format_info(md->formats, md->format_count, pixel_format); } static bool plane_caps_contain_any(u8 caps, u8 mask) @@ -1286,10 +1289,10 @@ bool intel_fb_needs_pot_stride_remap(const struct intel_framebuffer *fb) bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - return DISPLAY_VER(dev_priv) < 4 || + return DISPLAY_VER(display) < 4 || (plane->fbc && !plane_state->no_fbc_reason && plane_state->view.gtt.type == I915_GTT_VIEW_NORMAL); } @@ -2110,10 +2113,11 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); - intel_frontbuffer_put(intel_fb->frontbuffer); - intel_fb_bo_framebuffer_fini(intel_fb_bo(fb)); + intel_frontbuffer_put(intel_fb->frontbuffer); + + kfree(intel_fb->panic); kfree(intel_fb); } @@ -2206,24 +2210,33 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = { int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_display *display = to_intel_display(obj->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - int ret = -EINVAL; + int ret; int i; - ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); - if (ret) - return ret; + intel_fb->panic = intel_panic_alloc(); + if (!intel_fb->panic) + return -ENOMEM; + /* + * intel_frontbuffer_get() must be done before + * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. + */ intel_fb->frontbuffer = intel_frontbuffer_get(obj); if (!intel_fb->frontbuffer) { ret = -ENOMEM; - goto err; + goto err_free_panic; } + ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); + if (ret) + goto err_frontbuffer_put; + ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, mode_cmd->pixel_format, @@ -2231,7 +2244,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, @@ -2242,7 +2255,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2250,17 +2263,17 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } - drm_helper_mode_fill_fb_struct(display->drm, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) { unsigned int stride_alignment; if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2268,7 +2281,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2278,7 +2291,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } } @@ -2287,7 +2300,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(display, intel_fb); if (ret) - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2313,16 +2326,20 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); +err_bo_framebuffer_fini: + intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); -err: - intel_fb_bo_framebuffer_fini(obj); +err_free_panic: + kfree(intel_fb->panic); + return ret; } struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *user_mode_cmd) { struct drm_framebuffer *fb; @@ -2333,24 +2350,36 @@ intel_user_framebuffer_create(struct drm_device *dev, if (IS_ERR(obj)) return ERR_CAST(obj); - fb = intel_framebuffer_create(obj, &mode_cmd); + fb = intel_framebuffer_create(obj, info, &mode_cmd); drm_gem_object_put(obj); return fb; } +struct intel_framebuffer *intel_framebuffer_alloc(void) +{ + struct intel_framebuffer *intel_fb; + + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + if (!intel_fb) + return NULL; + + return intel_fb; +} + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; - intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); + ret = intel_framebuffer_init(intel_fb, obj, info, mode_cmd); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index bdd76b372957..22514d5f2bb6 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -47,7 +47,7 @@ u64 *intel_fb_plane_get_modifiers(struct intel_display *display, bool intel_fb_plane_supports_modifier(struct intel_plane *plane, u64 modifier); const struct drm_format_info * -intel_fb_get_format_info(const struct drm_mode_fb_cmd2 *cmd); +intel_fb_get_format_info(u32 pixel_format, u64 modifier); bool intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, @@ -102,13 +102,19 @@ void intel_add_fb_offsets(int *x, int *y, int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); + +struct intel_framebuffer *intel_framebuffer_alloc(void); + struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *user_mode_cmd); bool intel_fb_modifier_uses_dpt(struct intel_display *display, u64 modifier); diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 98a61a7b0b93..45af04cb0fb2 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -11,13 +11,14 @@ #include "gem/i915_gem_object.h" #include "i915_drv.h" -#include "intel_atomic_plane.h" +#include "i915_vma.h" #include "intel_display_core.h" #include "intel_display_rpm.h" #include "intel_display_types.h" #include "intel_dpt.h" #include "intel_fb.h" #include "intel_fb_pin.h" +#include "intel_plane.h" static struct i915_vma * intel_fb_pin_to_dpt(const struct drm_framebuffer *fb, @@ -151,7 +152,7 @@ intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb, * happy to scanout from anywhere within its global aperture. */ pinctl = 0; - if (HAS_GMCH(dev_priv)) + if (HAS_GMCH(display)) pinctl |= PIN_MAPPABLE; i915_gem_ww_ctx_init(&ww, true); @@ -192,7 +193,7 @@ retry: * mode that matches the user configuration. */ ret = i915_vma_pin_fence(vma); - if (ret != 0 && DISPLAY_VER(dev_priv) < 4) { + if (ret != 0 && DISPLAY_VER(display) < 4) { i915_vma_unpin(vma); goto err_unpin; } @@ -260,6 +261,7 @@ intel_plane_fb_vtd_guard(const struct intel_plane_state *plane_state) int intel_plane_pin_fb(struct intel_plane_state *plane_state, const struct intel_plane_state *old_plane_state) { + struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); const struct intel_framebuffer *fb = to_intel_framebuffer(plane_state->hw.fb); @@ -277,17 +279,6 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state, plane_state->ggtt_vma = vma; - /* - * Pre-populate the dma address before we enter the vblank - * evade critical section as i915_gem_object_get_dma_address() - * will trigger might_sleep() even if it won't actually sleep, - * which is the case when the fb has already been pinned. - */ - if (intel_plane_needs_physical(plane)) { - struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); - - plane_state->phys_dma_addr = i915_gem_object_get_dma_address(obj, 0); - } } else { unsigned int alignment = intel_plane_fb_min_alignment(plane_state); @@ -309,6 +300,28 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state, plane_state->dpt_vma = vma; WARN_ON(plane_state->ggtt_vma == plane_state->dpt_vma); + + /* + * The DPT object contains only one vma, and there is no VT-d + * guard, so the VMA's offset within the DPT is always 0. + */ + drm_WARN_ON(display->drm, intel_dpt_offset(plane_state->dpt_vma)); + } + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (intel_plane_needs_physical(plane)) { + struct drm_i915_gem_object *obj = to_intel_bo(intel_fb_bo(&fb->base)); + + plane_state->surf = i915_gem_object_get_dma_address(obj, 0) + + plane->surf_offset(plane_state); + } else { + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma) + + plane->surf_offset(plane_state); } return 0; @@ -334,3 +347,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_dpt_unpin_from_ggtt(fb->dpt_vm); } } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + iosys_map_set_vaddr_iomem(map, i915_vma_get_iomap(vma)); +} diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.h b/drivers/gpu/drm/i915/display/intel_fb_pin.h index 01770dbba2e0..81ab79da1af7 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.h +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.h @@ -12,6 +12,7 @@ struct drm_framebuffer; struct i915_vma; struct intel_plane_state; struct i915_gtt_view; +struct iosys_map; struct i915_vma * intel_fb_pin_to_ggtt(const struct drm_framebuffer *fb, @@ -27,5 +28,6 @@ void intel_fb_unpin_vma(struct i915_vma *vma, unsigned long flags); int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, const struct intel_plane_state *old_plane_state); void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state); +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map); #endif diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index ec1ef8694c35..0d380c825791 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -98,11 +98,7 @@ struct intel_fbc { struct intel_display *display; const struct intel_fbc_funcs *funcs; - /* - * This is always the inner lock when overlapping with - * struct_mutex and it's the outer lock when overlapping - * with stolen_lock. - */ + /* This is always the outer lock when overlapping with stolen_lock */ struct mutex lock; unsigned int busy_bits; @@ -383,11 +379,11 @@ static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) struct drm_i915_private *i915 = to_i915(display->drm); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_fb), U32_MAX)); drm_WARN_ON(display->drm, - range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), + range_end_overflows_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_llb), U32_MAX)); intel_de_write(display, FBC_CFB_BASE, @@ -552,10 +548,6 @@ static void ilk_fbc_deactivate(struct intel_fbc *fbc) if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); - - /* wa_18038517565 Enable DPFC clock gating after FBC disable */ - if (display->platform.dg2 || DISPLAY_VER(display) >= 14) - fbc_compressor_clkgate_disable_wa(fbc, false); } } @@ -1464,7 +1456,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (intel_display_needs_wa_16023588340(display)) { + if (intel_display_wa(display, 16023588340)) { plane_state->no_fbc_reason = "Wa_16023588340"; return 0; } @@ -1554,14 +1546,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (DISPLAY_VER(display) >= 9 && + if (IS_DISPLAY_VER(display, 9, 12) && plane_state->view.color_plane[0].y & 3) { plane_state->no_fbc_reason = "plane start Y offset misaligned"; return 0; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ - if (DISPLAY_VER(display) >= 11 && + if (IS_DISPLAY_VER(display, 9, 12) && (plane_state->view.color_plane[0].y + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; @@ -1576,7 +1568,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, if (IS_ERR(cdclk_state)) return PTR_ERR(cdclk_state); - if (crtc_state->pixel_rate >= cdclk_state->logical.cdclk * 95 / 100) { + if (crtc_state->pixel_rate >= intel_cdclk_logical(cdclk_state) * 95 / 100) { plane_state->no_fbc_reason = "pixel rate too high"; return 0; } @@ -1710,6 +1702,10 @@ static void __intel_fbc_disable(struct intel_fbc *fbc) __intel_fbc_cleanup_cfb(fbc); + /* wa_18038517565 Enable DPFC clock gating after FBC disable */ + if (display->platform.dg2 || DISPLAY_VER(display) >= 14) + fbc_compressor_clkgate_disable_wa(fbc, false); + fbc->state.plane = NULL; fbc->flip_pending = false; fbc->busy_bits = 0; @@ -2011,7 +2007,7 @@ void intel_fbc_reset_underrun(struct intel_display *display) static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = to_i915(fbc->display->drm); + struct intel_display *display = fbc->display; /* * There's no guarantee that underrun_detected won't be set to true @@ -2024,7 +2020,7 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) if (READ_ONCE(fbc->underrun_detected)) return; - queue_work(i915->unordered_wq, &fbc->underrun_work); + queue_work(display->wq.unordered, &fbc->underrun_work); } /** @@ -2240,10 +2236,9 @@ void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc) /* FIXME: remove this once igt is on board with per-crtc stuff */ void intel_fbc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; struct intel_fbc *fbc; fbc = display->fbc[INTEL_FBC_A]; if (fbc) - intel_fbc_debugfs_add(fbc, minor->debugfs_root); + intel_fbc_debugfs_add(fbc, display->drm->debugfs_root); } diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 2dc4029d71ed..7c4709d58aa3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -512,3 +512,8 @@ struct i915_vma *intel_fbdev_vma_pointer(struct intel_fbdev *fbdev) { return fbdev ? fbdev->vma : NULL; } + +void intel_fbdev_get_map(struct intel_fbdev *fbdev, struct iosys_map *map) +{ + intel_fb_get_map(fbdev->vma, map); +} diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h index a15e3e222a0c..150cc5f45bb3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.h +++ b/drivers/gpu/drm/i915/display/intel_fbdev.h @@ -13,6 +13,7 @@ struct drm_fb_helper_surface_size; struct intel_display; struct intel_fbdev; struct intel_framebuffer; +struct iosys_map; #ifdef CONFIG_DRM_FBDEV_EMULATION int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, @@ -22,7 +23,7 @@ int intel_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, void intel_fbdev_setup(struct intel_display *display); struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev); struct i915_vma *intel_fbdev_vma_pointer(struct intel_fbdev *fbdev); - +void intel_fbdev_get_map(struct intel_fbdev *fbdev, struct iosys_map *map); #else #define INTEL_FBDEV_DRIVER_OPS \ .fbdev_probe = NULL @@ -39,6 +40,9 @@ static inline struct i915_vma *intel_fbdev_vma_pointer(struct intel_fbdev *fbdev return NULL; } +static inline void intel_fbdev_get_map(struct intel_fbdev *fbdev, struct iosys_map *map) +{ +} #endif #endif /* __INTEL_FBDEV_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 8db3af36b2f2..210aee9ae88b 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -62,7 +62,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, return ERR_PTR(-ENOMEM); } - fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), &mode_cmd); + fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), + drm_get_format_info(display->drm, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); i915_gem_object_put(obj); return to_intel_framebuffer(fb); diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index 8039a84671cc..59a36b3a22c1 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -292,34 +292,6 @@ int intel_fdi_link_freq(struct intel_display *display, return display->fdi.pll_freq; } -/** - * intel_fdi_compute_pipe_bpp - compute pipe bpp limited by max link bpp - * @crtc_state: the crtc state - * - * Compute the pipe bpp limited by the CRTC's maximum link bpp. Encoders can - * call this function during state computation in the simple case where the - * link bpp will always match the pipe bpp. This is the case for all non-DP - * encoders, while DP encoders will use a link bpp lower than pipe bpp in case - * of DSC compression. - * - * Returns %true in case of success, %false if pipe bpp would need to be - * reduced below its valid range. - */ -bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state) -{ - int pipe_bpp = min(crtc_state->pipe_bpp, - fxp_q4_to_int(crtc_state->max_link_bpp_x16)); - - pipe_bpp = rounddown(pipe_bpp, 2 * 3); - - if (pipe_bpp < 6 * 3) - return false; - - crtc_state->pipe_bpp = pipe_bpp; - - return true; -} - int ilk_fdi_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { diff --git a/drivers/gpu/drm/i915/display/intel_fdi.h b/drivers/gpu/drm/i915/display/intel_fdi.h index ad5e103c38a8..1cd08df9b0c2 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.h +++ b/drivers/gpu/drm/i915/display/intel_fdi.h @@ -20,7 +20,6 @@ struct intel_link_bw_limits; int intel_fdi_add_affected_crtcs(struct intel_atomic_state *state); int intel_fdi_link_freq(struct intel_display *display, const struct intel_crtc_state *pipe_config); -bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state); int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, struct intel_crtc_state *pipe_config); int intel_fdi_atomic_check_link(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_flipq.c b/drivers/gpu/drm/i915/display/intel_flipq.c new file mode 100644 index 000000000000..6ab2272ab2df --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_flipq.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/pci.h> + +#include <drm/drm_print.h> + +#include "i915_utils.h" +#include "intel_step.h" +#include "intel_crtc.h" +#include "intel_de.h" +#include "intel_display_core.h" +#include "intel_display_types.h" +#include "intel_flipq.h" +#include "intel_dmc.h" +#include "intel_dmc_regs.h" +#include "intel_dsb.h" +#include "intel_vblank.h" +#include "intel_vrr.h" + +/** + * DOC: DMC Flip Queue + * + * A flip queue is a ring buffer implemented by the pipe DMC firmware. + * The driver inserts entries into the queues to be executed by the + * pipe DMC at a specified presentation timestamp (PTS). + * + * Each pipe DMC provides several queues: + * + * - 1 general queue (two DSB buffers executed per entry) + * - 3 plane queues (one DSB buffer executed per entry) + * - 1 fast queue (deprecated) + */ + +#define for_each_flipq(flipq_id) \ + for ((flipq_id) = INTEL_FLIPQ_PLANE_1; (flipq_id) < MAX_INTEL_FLIPQ; (flipq_id)++) + +static int intel_flipq_offset(enum intel_flipq_id flipq_id) +{ + switch (flipq_id) { + case INTEL_FLIPQ_PLANE_1: + return 0x008; + case INTEL_FLIPQ_PLANE_2: + return 0x108; + case INTEL_FLIPQ_PLANE_3: + return 0x208; + case INTEL_FLIPQ_GENERAL: + return 0x308; + case INTEL_FLIPQ_FAST: + return 0x3c8; + default: + MISSING_CASE(flipq_id); + return 0; + } +} + +static int intel_flipq_size_dw(enum intel_flipq_id flipq_id) +{ + switch (flipq_id) { + case INTEL_FLIPQ_PLANE_1: + case INTEL_FLIPQ_PLANE_2: + case INTEL_FLIPQ_PLANE_3: + return 64; + case INTEL_FLIPQ_GENERAL: + case INTEL_FLIPQ_FAST: + return 48; + default: + MISSING_CASE(flipq_id); + return 1; + } +} + +static int intel_flipq_elem_size_dw(enum intel_flipq_id flipq_id) +{ + switch (flipq_id) { + case INTEL_FLIPQ_PLANE_1: + case INTEL_FLIPQ_PLANE_2: + case INTEL_FLIPQ_PLANE_3: + return 4; + case INTEL_FLIPQ_GENERAL: + case INTEL_FLIPQ_FAST: + return 6; + default: + MISSING_CASE(flipq_id); + return 1; + } +} + +static int intel_flipq_size_entries(enum intel_flipq_id flipq_id) +{ + return intel_flipq_size_dw(flipq_id) / intel_flipq_elem_size_dw(flipq_id); +} + +static void intel_flipq_crtc_init(struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + enum intel_flipq_id flipq_id; + + for_each_flipq(flipq_id) { + struct intel_flipq *flipq = &crtc->flipq[flipq_id]; + + flipq->start_mmioaddr = intel_pipedmc_start_mmioaddr(crtc) + intel_flipq_offset(flipq_id); + flipq->flipq_id = flipq_id; + + drm_dbg_kms(display->drm, "[CRTC:%d:%s] FQ %d: start 0x%x\n", + crtc->base.base.id, crtc->base.name, + flipq_id, flipq->start_mmioaddr); + } +} + +bool intel_flipq_supported(struct intel_display *display) +{ + if (!display->params.enable_flipq) + return false; + + if (!display->dmc.dmc) + return false; + + if (DISPLAY_VER(display) == 20) + return true; + + /* DMC firmware expects VRR timing generator to be used */ + return DISPLAY_VER(display) >= 30 && intel_vrr_always_use_vrr_tg(display); +} + +void intel_flipq_init(struct intel_display *display) +{ + struct intel_crtc *crtc; + + intel_dmc_wait_fw_load(display); + + for_each_intel_crtc(display->drm, crtc) + intel_flipq_crtc_init(crtc); +} + +static int cdclk_factor(struct intel_display *display) +{ + if (DISPLAY_VER(display) >= 30) + return 120; + else + return 280; +} + +int intel_flipq_exec_time_us(struct intel_display *display) +{ + return intel_dsb_exec_time_us() + + DIV_ROUND_UP(display->cdclk.hw.cdclk * cdclk_factor(display), 540000) + + display->sagv.block_time_us; +} + +static int intel_flipq_preempt_timeout_ms(struct intel_display *display) +{ + return DIV_ROUND_UP(intel_flipq_exec_time_us(display), 1000); +} + +static void intel_flipq_preempt(struct intel_crtc *crtc, bool preempt) +{ + struct intel_display *display = to_intel_display(crtc); + + intel_de_rmw(display, PIPEDMC_FQ_CTRL(crtc->pipe), + PIPEDMC_FQ_CTRL_PREEMPT, preempt ? PIPEDMC_FQ_CTRL_PREEMPT : 0); + + if (preempt && + intel_de_wait_for_clear(display, + PIPEDMC_FQ_STATUS(crtc->pipe), + PIPEDMC_FQ_STATUS_BUSY, + intel_flipq_preempt_timeout_ms(display))) + drm_err(display->drm, "[CRTC:%d:%s] flip queue preempt timeout\n", + crtc->base.base.id, crtc->base.name); +} + +static int intel_flipq_current_head(struct intel_crtc *crtc, enum intel_flipq_id flipq_id) +{ + struct intel_display *display = to_intel_display(crtc); + + return intel_de_read(display, PIPEDMC_FPQ_CHP(crtc->pipe, flipq_id)); +} + +static void intel_flipq_write_tail(struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + + intel_de_write(display, PIPEDMC_FPQ_ATOMIC_TP(crtc->pipe), + PIPEDMC_FPQ_PLANEQ_3_TP(crtc->flipq[INTEL_FLIPQ_PLANE_3].tail) | + PIPEDMC_FPQ_PLANEQ_2_TP(crtc->flipq[INTEL_FLIPQ_PLANE_2].tail) | + PIPEDMC_FPQ_PLANEQ_1_TP(crtc->flipq[INTEL_FLIPQ_PLANE_1].tail) | + PIPEDMC_FPQ_FASTQ_TP(crtc->flipq[INTEL_FLIPQ_FAST].tail) | + PIPEDMC_FPQ_GENERALQ_TP(crtc->flipq[INTEL_FLIPQ_GENERAL].tail)); +} + +static void intel_flipq_sw_dmc_wake(struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + + intel_de_write(display, PIPEDMC_FPQ_CTL1(crtc->pipe), PIPEDMC_SW_DMC_WAKE); +} + +static int intel_flipq_exec_time_lines(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + return intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, + intel_flipq_exec_time_us(display)); +} + +void intel_flipq_dump(struct intel_crtc *crtc, + enum intel_flipq_id flipq_id) +{ + struct intel_display *display = to_intel_display(crtc); + struct intel_flipq *flipq = &crtc->flipq[flipq_id]; + u32 tmp; + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] FQ %d @ 0x%x: ", + crtc->base.base.id, crtc->base.name, flipq_id, + flipq->start_mmioaddr); + for (int i = 0 ; i < intel_flipq_size_dw(flipq_id); i++) { + printk(KERN_CONT " 0x%08x", + intel_de_read(display, PIPEDMC_FQ_RAM(flipq->start_mmioaddr, i))); + if (i % intel_flipq_elem_size_dw(flipq_id) == intel_flipq_elem_size_dw(flipq_id) - 1) + printk(KERN_CONT "\n"); + } + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] FQ %d: chp=0x%x, hp=0x%x\n", + crtc->base.base.id, crtc->base.name, flipq_id, + intel_de_read(display, PIPEDMC_FPQ_CHP(crtc->pipe, flipq_id)), + intel_de_read(display, PIPEDMC_FPQ_HP(crtc->pipe, flipq_id))); + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] FQ %d: current head %d\n", + crtc->base.base.id, crtc->base.name, flipq_id, + intel_flipq_current_head(crtc, flipq_id)); + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] flip queue timestamp: 0x%x\n", + crtc->base.base.id, crtc->base.name, + intel_de_read(display, PIPEDMC_FPQ_TS(crtc->pipe))); + + tmp = intel_de_read(display, PIPEDMC_FPQ_ATOMIC_TP(crtc->pipe)); + + drm_dbg_kms(display->drm, + "[CRTC:%d:%s] flip queue atomic tails: P3 %d, P2 %d, P1 %d, G %d, F %d\n", + crtc->base.base.id, crtc->base.name, + REG_FIELD_GET(PIPEDMC_FPQ_PLANEQ_3_TP_MASK, tmp), + REG_FIELD_GET(PIPEDMC_FPQ_PLANEQ_2_TP_MASK, tmp), + REG_FIELD_GET(PIPEDMC_FPQ_PLANEQ_1_TP_MASK, tmp), + REG_FIELD_GET(PIPEDMC_FPQ_GENERALQ_TP_MASK, tmp), + REG_FIELD_GET(PIPEDMC_FPQ_FASTQ_TP_MASK, tmp)); +} + +void intel_flipq_reset(struct intel_display *display, enum pipe pipe) +{ + struct intel_crtc *crtc = intel_crtc_for_pipe(display, pipe); + enum intel_flipq_id flipq_id; + + intel_de_write(display, PIPEDMC_FQ_CTRL(pipe), 0); + + intel_de_write(display, PIPEDMC_SCANLINECMPLOWER(pipe), 0); + intel_de_write(display, PIPEDMC_SCANLINECMPUPPER(pipe), 0); + + for_each_flipq(flipq_id) { + struct intel_flipq *flipq = &crtc->flipq[flipq_id]; + + intel_de_write(display, PIPEDMC_FPQ_HP(pipe, flipq_id), 0); + intel_de_write(display, PIPEDMC_FPQ_CHP(pipe, flipq_id), 0); + + flipq->tail = 0; + } + + intel_de_write(display, PIPEDMC_FPQ_ATOMIC_TP(pipe), 0); +} + +static enum pipedmc_event_id flipq_event_id(struct intel_display *display) +{ + if (DISPLAY_VER(display) >= 30) + return PIPEDMC_EVENT_FULL_FQ_WAKE_TRIGGER; + else + return PIPEDMC_EVENT_SCANLINE_INRANGE_FQ_TRIGGER; +} + +void intel_flipq_enable(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + /* FIXME what to do with VRR? */ + int scanline = intel_mode_vblank_start(&crtc_state->hw.adjusted_mode) - + intel_flipq_exec_time_lines(crtc_state); + + if (DISPLAY_VER(display) >= 30) { + u32 start_mmioaddr = intel_pipedmc_start_mmioaddr(crtc); + + /* undocumented magic DMC variables */ + intel_de_write(display, PTL_PIPEDMC_EXEC_TIME_LINES(start_mmioaddr), + intel_flipq_exec_time_lines(crtc_state)); + intel_de_write(display, PTL_PIPEDMC_END_OF_EXEC_GB(start_mmioaddr), + 100); + } + + intel_de_write(display, PIPEDMC_SCANLINECMPUPPER(crtc->pipe), + PIPEDMC_SCANLINE_UPPER(scanline)); + intel_de_write(display, PIPEDMC_SCANLINECMPLOWER(crtc->pipe), + PIPEDMC_SCANLINEINRANGECMP_EN | + PIPEDMC_SCANLINE_LOWER(scanline - 2)); + + intel_pipedmc_enable_event(crtc, flipq_event_id(display)); + + intel_de_write(display, PIPEDMC_FQ_CTRL(crtc->pipe), PIPEDMC_FQ_CTRL_ENABLE); +} + +void intel_flipq_disable(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + + intel_flipq_preempt(crtc, true); + + intel_de_write(display, PIPEDMC_FQ_CTRL(crtc->pipe), 0); + + intel_pipedmc_disable_event(crtc, flipq_event_id(display)); + + intel_de_write(display, PIPEDMC_SCANLINECMPLOWER(crtc->pipe), 0); + intel_de_write(display, PIPEDMC_SCANLINECMPUPPER(crtc->pipe), 0); +} + +static bool assert_flipq_has_room(struct intel_crtc *crtc, + enum intel_flipq_id flipq_id) +{ + struct intel_display *display = to_intel_display(crtc); + struct intel_flipq *flipq = &crtc->flipq[flipq_id]; + int head, size = intel_flipq_size_entries(flipq_id); + + head = intel_flipq_current_head(crtc, flipq_id); + + return !drm_WARN(display->drm, + (flipq->tail + size - head) % size >= size - 1, + "[CRTC:%d:%s] FQ %d overflow (head %d, tail %d, size %d)\n", + crtc->base.base.id, crtc->base.name, flipq_id, + head, flipq->tail, size); +} + +static void intel_flipq_write(struct intel_display *display, + struct intel_flipq *flipq, u32 data, int i) +{ + intel_de_write(display, PIPEDMC_FQ_RAM(flipq->start_mmioaddr, flipq->tail * + intel_flipq_elem_size_dw(flipq->flipq_id) + i), data); +} + +static void lnl_flipq_add(struct intel_display *display, + struct intel_flipq *flipq, + unsigned int pts, + enum intel_dsb_id dsb_id, + struct intel_dsb *dsb) +{ + int i = 0; + + switch (flipq->flipq_id) { + case INTEL_FLIPQ_GENERAL: + intel_flipq_write(display, flipq, pts, i++); + intel_flipq_write(display, flipq, intel_dsb_head(dsb), i++); + intel_flipq_write(display, flipq, LNL_FQ_INTERRUPT | + LNL_FQ_DSB_ID(dsb_id) | + LNL_FQ_DSB_SIZE(intel_dsb_size(dsb) / 64), i++); + intel_flipq_write(display, flipq, 0, i++); + intel_flipq_write(display, flipq, 0, i++); /* head for second DSB */ + intel_flipq_write(display, flipq, 0, i++); /* DSB engine + size for second DSB */ + break; + case INTEL_FLIPQ_PLANE_1: + case INTEL_FLIPQ_PLANE_2: + case INTEL_FLIPQ_PLANE_3: + intel_flipq_write(display, flipq, pts, i++); + intel_flipq_write(display, flipq, intel_dsb_head(dsb), i++); + intel_flipq_write(display, flipq, LNL_FQ_INTERRUPT | + LNL_FQ_DSB_ID(dsb_id) | + LNL_FQ_DSB_SIZE(intel_dsb_size(dsb) / 64), i++); + intel_flipq_write(display, flipq, 0, i++); + break; + default: + MISSING_CASE(flipq->flipq_id); + return; + } +} + +static void ptl_flipq_add(struct intel_display *display, + struct intel_flipq *flipq, + unsigned int pts, + enum intel_dsb_id dsb_id, + struct intel_dsb *dsb) +{ + int i = 0; + + switch (flipq->flipq_id) { + case INTEL_FLIPQ_GENERAL: + intel_flipq_write(display, flipq, pts, i++); + intel_flipq_write(display, flipq, 0, i++); + intel_flipq_write(display, flipq, PTL_FQ_INTERRUPT | + PTL_FQ_DSB_ID(dsb_id) | + PTL_FQ_DSB_SIZE(intel_dsb_size(dsb) / 64), i++); + intel_flipq_write(display, flipq, intel_dsb_head(dsb), i++); + intel_flipq_write(display, flipq, 0, i++); /* DSB engine + size for second DSB */ + intel_flipq_write(display, flipq, 0, i++); /* head for second DSB */ + break; + case INTEL_FLIPQ_PLANE_1: + case INTEL_FLIPQ_PLANE_2: + case INTEL_FLIPQ_PLANE_3: + intel_flipq_write(display, flipq, pts, i++); + intel_flipq_write(display, flipq, 0, i++); + intel_flipq_write(display, flipq, PTL_FQ_INTERRUPT | + PTL_FQ_DSB_ID(dsb_id) | + PTL_FQ_DSB_SIZE(intel_dsb_size(dsb) / 64), i++); + intel_flipq_write(display, flipq, intel_dsb_head(dsb), i++); + break; + default: + MISSING_CASE(flipq->flipq_id); + return; + } +} + +void intel_flipq_add(struct intel_crtc *crtc, + enum intel_flipq_id flipq_id, + unsigned int pts, + enum intel_dsb_id dsb_id, + struct intel_dsb *dsb) +{ + struct intel_display *display = to_intel_display(crtc); + struct intel_flipq *flipq = &crtc->flipq[flipq_id]; + + if (!assert_flipq_has_room(crtc, flipq_id)) + return; + + pts += intel_de_read(display, PIPEDMC_FPQ_TS(crtc->pipe)); + + intel_flipq_preempt(crtc, true); + + if (DISPLAY_VER(display) >= 30) + ptl_flipq_add(display, flipq, pts, dsb_id, dsb); + else + lnl_flipq_add(display, flipq, pts, dsb_id, dsb); + + flipq->tail = (flipq->tail + 1) % intel_flipq_size_entries(flipq->flipq_id); + intel_flipq_write_tail(crtc); + + intel_flipq_preempt(crtc, false); + + intel_flipq_sw_dmc_wake(crtc); +} + +/* Wa_18034343758 */ +static bool need_dmc_halt_wa(struct intel_display *display) +{ + return DISPLAY_VER(display) == 20 || + (display->platform.pantherlake && + IS_DISPLAY_STEP(display, STEP_A0, STEP_B0)); +} + +void intel_flipq_wait_dmc_halt(struct intel_dsb *dsb, struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + + if (need_dmc_halt_wa(display)) + intel_dsb_wait_usec(dsb, 2); +} + +void intel_flipq_unhalt_dmc(struct intel_dsb *dsb, struct intel_crtc *crtc) +{ + struct intel_display *display = to_intel_display(crtc); + + if (need_dmc_halt_wa(display)) + intel_dsb_reg_write(dsb, PIPEDMC_CTL(crtc->pipe), 0); +} diff --git a/drivers/gpu/drm/i915/display/intel_flipq.h b/drivers/gpu/drm/i915/display/intel_flipq.h new file mode 100644 index 000000000000..012e3e9a6bcb --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_flipq.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef __INTEL_FLIPQ_H__ +#define __INTEL_FLIPQ_H__ + +#include <linux/types.h> + +enum intel_dsb_id; +enum intel_flipq_id; +enum pipe; +struct intel_crtc; +struct intel_crtc_state; +struct intel_display; +struct intel_dsb; + +bool intel_flipq_supported(struct intel_display *display); +void intel_flipq_init(struct intel_display *display); +void intel_flipq_reset(struct intel_display *display, enum pipe pipe); + +void intel_flipq_enable(const struct intel_crtc_state *crtc_state); +void intel_flipq_disable(const struct intel_crtc_state *old_crtc_state); + +void intel_flipq_add(struct intel_crtc *crtc, + enum intel_flipq_id flip_queue_id, + unsigned int pts, + enum intel_dsb_id dsb_id, + struct intel_dsb *dsb); +int intel_flipq_exec_time_us(struct intel_display *display); +void intel_flipq_wait_dmc_halt(struct intel_dsb *dsb, struct intel_crtc *crtc); +void intel_flipq_unhalt_dmc(struct intel_dsb *dsb, struct intel_crtc *crtc); +void intel_flipq_dump(struct intel_crtc *crtc, + enum intel_flipq_id flip_queue_id); + +#endif /* __INTEL_FLIPQ_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 43be5377ddc1..73ed28ac9573 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -270,6 +270,8 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&display->fb_tracking.lock); i915_active_fini(&front->write); + + drm_gem_object_put(obj); kfree_rcu(front, rcu); } @@ -287,6 +289,8 @@ intel_frontbuffer_get(struct drm_gem_object *obj) if (!front) return NULL; + drm_gem_object_get(obj); + front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); @@ -299,8 +303,12 @@ intel_frontbuffer_get(struct drm_gem_object *obj) spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); spin_unlock(&display->fb_tracking.lock); - if (cur != front) + + if (cur != front) { + drm_gem_object_put(obj); kfree(front); + } + return cur; } diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 000a898c9480..30eff6009e87 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -13,6 +13,36 @@ #include "intel_display_types.h" #include "intel_global_state.h" +#define for_each_new_global_obj_in_state(__state, obj, new_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ + (__i)++) \ + for_each_if(obj) + +#define for_each_old_global_obj_in_state(__state, obj, old_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (old_obj_state) = (__state)->global_objs[__i].old_state, 1); \ + (__i)++) \ + for_each_if(obj) + +#define for_each_oldnew_global_obj_in_state(__state, obj, old_obj_state, new_obj_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->num_global_objs && \ + ((obj) = (__state)->global_objs[__i].ptr, \ + (old_obj_state) = (__state)->global_objs[__i].old_state, \ + (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ + (__i)++) \ + for_each_if(obj) + +struct intel_global_objs_state { + struct intel_global_obj *ptr; + struct intel_global_state *state, *old_state, *new_state; +}; + struct intel_global_commit { struct kref ref; struct completion done; @@ -148,7 +178,7 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, struct intel_display *display = to_intel_display(state); int index, num_objs, i; size_t size; - struct __intel_global_objs_state *arr; + struct intel_global_objs_state *arr; struct intel_global_state *obj_state; for (i = 0; i < state->num_global_objs; i++) diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index d42fb2547ee9..e1efa530cc86 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -11,6 +11,7 @@ struct intel_atomic_state; struct intel_display; +struct intel_global_commit; struct intel_global_obj; struct intel_global_state; @@ -26,36 +27,6 @@ struct intel_global_obj { const struct intel_global_state_funcs *funcs; }; -#define intel_for_each_global_obj(obj, dev_priv) \ - list_for_each_entry(obj, &(dev_priv)->display.global.obj_list, head) - -#define for_each_new_global_obj_in_state(__state, obj, new_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ - (__i)++) \ - for_each_if(obj) - -#define for_each_old_global_obj_in_state(__state, obj, old_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (old_obj_state) = (__state)->global_objs[__i].old_state, 1); \ - (__i)++) \ - for_each_if(obj) - -#define for_each_oldnew_global_obj_in_state(__state, obj, old_obj_state, new_obj_state, __i) \ - for ((__i) = 0; \ - (__i) < (__state)->num_global_objs && \ - ((obj) = (__state)->global_objs[__i].ptr, \ - (old_obj_state) = (__state)->global_objs[__i].old_state, \ - (new_obj_state) = (__state)->global_objs[__i].new_state, 1); \ - (__i)++) \ - for_each_if(obj) - -struct intel_global_commit; - struct intel_global_state { struct intel_global_obj *obj; struct intel_atomic_state *state; @@ -64,11 +35,6 @@ struct intel_global_state { bool changed, serialized; }; -struct __intel_global_objs_state { - struct intel_global_obj *ptr; - struct intel_global_state *state, *old_state, *new_state; -}; - void intel_atomic_global_obj_init(struct intel_display *display, struct intel_global_obj *obj, struct intel_global_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 0d73f32fe7f1..358210adb8f8 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -30,6 +30,7 @@ #include <linux/export.h> #include <linux/i2c-algo-bit.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <drm/display/drm_hdcp_helper.h> @@ -39,6 +40,7 @@ #include "intel_de.h" #include "intel_display_regs.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_gmbus.h" #include "intel_gmbus_regs.h" @@ -217,7 +219,7 @@ static void pnv_gmbus_clock_gating(struct intel_display *display, bool enable) { /* When using bit bashing for I2C, this bit needs to be set to 1 */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, DSPCLK_GATE_D, PNV_GMBUSUNIT_CLOCK_GATE_DISABLE, !enable ? PNV_GMBUSUNIT_CLOCK_GATE_DISABLE : 0); } @@ -240,14 +242,20 @@ static void bxt_gmbus_clock_gating(struct intel_display *display, static u32 get_reserved(struct intel_gmbus *bus) { struct intel_display *display = bus->display; - u32 reserved = 0; + u32 preserve_bits = 0; + + if (display->platform.i830 || display->platform.i845g) + return 0; /* On most chips, these bits must be preserved in software. */ - if (!display->platform.i830 && !display->platform.i845g) - reserved = intel_de_read_notrace(display, bus->gpio_reg) & - (GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE); + preserve_bits |= GPIO_DATA_PULLUP_DISABLE | GPIO_CLOCK_PULLUP_DISABLE; + + /* Wa_16025573575: the masks bits need to be preserved through out */ + if (intel_display_wa(display, 16025573575)) + preserve_bits |= GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_VAL_MASK | + GPIO_DATA_DIR_MASK | GPIO_DATA_VAL_MASK; - return reserved; + return intel_de_read_notrace(display, bus->gpio_reg) & preserve_bits; } static int get_clock(void *data) @@ -308,6 +316,22 @@ static void set_data(void *data, int state_high) intel_de_posting_read(display, bus->gpio_reg); } +static void +ptl_handle_mask_bits(struct intel_gmbus *bus, bool set) +{ + struct intel_display *display = bus->display; + u32 reg_val = intel_de_read_notrace(display, bus->gpio_reg); + u32 mask_bits = GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_VAL_MASK | + GPIO_DATA_DIR_MASK | GPIO_DATA_VAL_MASK; + if (set) + reg_val |= mask_bits; + else + reg_val &= ~mask_bits; + + intel_de_write_notrace(display, bus->gpio_reg, reg_val); + intel_de_posting_read(display, bus->gpio_reg); +} + static int intel_gpio_pre_xfer(struct i2c_adapter *adapter) { @@ -319,6 +343,9 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter) if (display->platform.pineview) pnv_gmbus_clock_gating(display, false); + if (intel_display_wa(display, 16025573575)) + ptl_handle_mask_bits(bus, true); + set_data(bus, 1); set_clock(bus, 1); udelay(I2C_RISEFALL_TIME); @@ -336,6 +363,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter) if (display->platform.pineview) pnv_gmbus_clock_gating(display, true); + + if (intel_display_wa(display, 16025573575)) + ptl_handle_mask_bits(bus, false); } static void @@ -385,11 +415,14 @@ static int gmbus_wait(struct intel_display *display, u32 status, u32 irq_en) intel_de_write_fw(display, GMBUS4(display), irq_en); status |= GMBUS_SATOER; - ret = wait_for_us((gmbus2 = intel_de_read_fw(display, GMBUS2(display))) & status, - 2); + + ret = poll_timeout_us_atomic(gmbus2 = intel_de_read_fw(display, GMBUS2(display)), + gmbus2 & status, + 0, 2, false); if (ret) - ret = wait_for((gmbus2 = intel_de_read_fw(display, GMBUS2(display))) & status, - 50); + ret = poll_timeout_us(gmbus2 = intel_de_read_fw(display, GMBUS2(display)), + gmbus2 & status, + 500, 50 * 1000, false); intel_de_write_fw(display, GMBUS4(display), 0); remove_wait_queue(&display->gmbus.wait_queue, &wait); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 5235e4162555..531ee122bf82 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -11,13 +11,15 @@ #include <linux/component.h> #include <linux/debugfs.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/random.h> #include <drm/display/drm_hdcp_helper.h> +#include <drm/drm_print.h> #include <drm/intel/i915_component.h> -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_connector.h" #include "intel_de.h" #include "intel_display_power.h" @@ -32,6 +34,7 @@ #include "intel_hdcp_regs.h" #include "intel_hdcp_shim.h" #include "intel_pcode.h" +#include "intel_step.h" #define USE_HDCP_GSC(__display) (DISPLAY_VER(__display) >= 14) @@ -324,16 +327,13 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *dig_port, bool ksv_ready; /* Poll for ksv list ready (spec says max time allowed is 5s) */ - ret = __wait_for(read_ret = shim->read_ksv_ready(dig_port, - &ksv_ready), - read_ret || ksv_ready, 5 * 1000 * 1000, 1000, - 100 * 1000); + ret = poll_timeout_us(read_ret = shim->read_ksv_ready(dig_port, &ksv_ready), + read_ret || ksv_ready, + 100 * 1000, 5 * 1000 * 1000, false); if (ret) return ret; if (read_ret) return read_ret; - if (!ksv_ready) - return -ETIMEDOUT; return 0; } @@ -374,7 +374,6 @@ static void intel_hdcp_clear_keys(struct intel_display *display) static int intel_hdcp_load_keys(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret; u32 val; @@ -399,7 +398,7 @@ static int intel_hdcp_load_keys(struct intel_display *display) * Mailbox interface. */ if (DISPLAY_VER(display) == 9 && !display->platform.broxton) { - ret = snb_pcode_write(&i915->uncore, SKL_PCODE_LOAD_HDCP_KEYS, 1); + ret = intel_pcode_write(display->drm, SKL_PCODE_LOAD_HDCP_KEYS, 1); if (ret) { drm_err(display->drm, "Failed to initiate HDCP key load (%d)\n", @@ -816,6 +815,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) enum port port = dig_port->base.port; unsigned long r0_prime_gen_start; int ret, i, tries = 2; + u32 val; union { u32 reg[2]; u8 shim[DRM_HDCP_AN_LEN]; @@ -904,8 +904,10 @@ static int intel_hdcp_auth(struct intel_connector *connector) HDCP_CONF_AUTH_AND_ENC); /* Wait for R0 ready */ - if (wait_for(intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { + ret = poll_timeout_us(val = intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)), + val & (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), + 100, 1000, false); + if (ret) { drm_err(display->drm, "Timed out waiting for R0 ready\n"); return -ETIMEDOUT; } @@ -937,16 +939,16 @@ static int intel_hdcp_auth(struct intel_connector *connector) ri.reg); /* Wait for Ri prime match */ - if (!wait_for(intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) + ret = poll_timeout_us(val = intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)), + val & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), + 100, 1000, false); + if (!ret) break; } if (i == tries) { drm_dbg_kms(display->drm, - "Timed out waiting for Ri prime match (%x)\n", - intel_de_read(display, - HDCP_STATUS(display, cpu_transcoder, port))); + "Timed out waiting for Ri prime match (%x)\n", val); return -ETIMEDOUT; } @@ -1089,7 +1091,6 @@ static void intel_hdcp_update_value(struct intel_connector *connector, u64 value, bool update_property) { struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); struct intel_hdcp *hdcp = &connector->hdcp; @@ -1110,7 +1111,7 @@ static void intel_hdcp_update_value(struct intel_connector *connector, hdcp->value = value; if (update_property) { drm_connector_get(&connector->base); - if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + if (!queue_work(display->wq.unordered, &hdcp->prop_work)) drm_connector_put(&connector->base); } } @@ -2237,16 +2238,15 @@ static void intel_hdcp_check_work(struct work_struct *work) check_work); struct intel_connector *connector = intel_hdcp_to_connector(hdcp); struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(display->drm); if (drm_connector_is_unregistered(&connector->base)) return; if (!intel_hdcp2_check_link(connector)) - queue_delayed_work(i915->unordered_wq, &hdcp->check_work, + queue_delayed_work(display->wq.unordered, &hdcp->check_work, DRM_HDCP2_CHECK_PERIOD_MS); else if (!intel_hdcp_check_link(connector)) - queue_delayed_work(i915->unordered_wq, &hdcp->check_work, + queue_delayed_work(display->wq.unordered, &hdcp->check_work, DRM_HDCP_CHECK_PERIOD_MS); } @@ -2437,7 +2437,6 @@ static int _intel_hdcp_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct intel_display *display = to_intel_display(encoder); - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); @@ -2448,12 +2447,6 @@ static int _intel_hdcp_enable(struct intel_atomic_state *state, if (!hdcp->shim) return -ENOENT; - if (!connector->encoder) { - drm_err(display->drm, "[CONNECTOR:%d:%s] encoder is not initialized\n", - connector->base.base.id, connector->base.name); - return -ENODEV; - } - mutex_lock(&hdcp->mutex); mutex_lock(&dig_port->hdcp.mutex); drm_WARN_ON(display->drm, @@ -2496,7 +2489,7 @@ static int _intel_hdcp_enable(struct intel_atomic_state *state, } if (!ret) { - queue_delayed_work(i915->unordered_wq, &hdcp->check_work, + queue_delayed_work(display->wq.unordered, &hdcp->check_work, check_link_interval); intel_hdcp_update_value(connector, DRM_MODE_CONTENT_PROTECTION_ENABLED, @@ -2567,7 +2560,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, to_intel_connector(conn_state->connector); struct intel_hdcp *hdcp = &connector->hdcp; bool content_protection_type_changed, desired_and_not_enabled = false; - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); if (!connector->hdcp.shim) return; @@ -2594,7 +2587,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, mutex_lock(&hdcp->mutex); hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; drm_connector_get(&connector->base); - if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + if (!queue_work(display->wq.unordered, &hdcp->prop_work)) drm_connector_put(&connector->base); mutex_unlock(&hdcp->mutex); } @@ -2612,7 +2605,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, */ if (!desired_and_not_enabled && !content_protection_type_changed) { drm_connector_get(&connector->base); - if (!queue_work(i915->unordered_wq, &hdcp->prop_work)) + if (!queue_work(display->wq.unordered, &hdcp->prop_work)) drm_connector_put(&connector->base); } @@ -2736,7 +2729,6 @@ void intel_hdcp_handle_cp_irq(struct intel_connector *connector) { struct intel_hdcp *hdcp = &connector->hdcp; struct intel_display *display = to_intel_display(connector); - struct drm_i915_private *i915 = to_i915(display->drm); if (!hdcp->shim) return; @@ -2744,7 +2736,7 @@ void intel_hdcp_handle_cp_irq(struct intel_connector *connector) atomic_inc(&connector->hdcp.cp_irq_count); wake_up_all(&connector->hdcp.cp_irq_queue); - queue_delayed_work(i915->unordered_wq, &hdcp->check_work, 0); + queue_delayed_work(display->wq.unordered, &hdcp->check_work, 0); } static void __intel_hdcp_info(struct seq_file *m, struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index f590d7f48ba7..112ce8c896d6 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -247,7 +247,7 @@ _TRANSA_HDCP2_STREAM_STATUS, \ _TRANSB_HDCP2_STREAM_STATUS) #define STREAM_ENCRYPTION_STATUS REG_BIT(31) -#define STREAM_TYPE_STATUS REG_BIT(30) +#define STREAM_TYPE_STATUS_MASK REG_GENMASK(30, 30) #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ @@ -263,7 +263,7 @@ #define TRANS_HDCP2_AUTH_STREAM(trans) _MMIO_TRANS(trans, \ _TRANSA_HDCP2_AUTH_STREAM, \ _TRANSB_HDCP2_AUTH_STREAM) -#define AUTH_STREAM_TYPE REG_BIT(31) +#define AUTH_STREAM_TYPE_MASK REG_GENMASK(31, 31) #define HDCP2_AUTH_STREAM(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_AUTH_STREAM(trans) : \ diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 9961ff259298..4ab7e2e3bfd4 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -29,6 +29,7 @@ #include <linux/delay.h> #include <linux/hdmi.h> #include <linux/i2c.h> +#include <linux/iopoll.h> #include <linux/slab.h> #include <linux/string_helpers.h> @@ -60,6 +61,7 @@ #include "intel_hdcp_regs.h" #include "intel_hdcp_shim.h" #include "intel_hdmi.h" +#include "intel_link_bw.h" #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_pfit.h" @@ -1582,9 +1584,9 @@ bool intel_hdmi_hdcp_check_link_once(struct intel_digital_port *dig_port, intel_de_write(display, HDCP_RPRIME(display, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (wait_for((intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC)) == - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + ret = intel_de_wait_for_set(display, HDCP_STATUS(display, cpu_transcoder, port), + HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC, 1); + if (ret) { drm_dbg_kms(display->drm, "Ri' mismatch detected (%x)\n", intel_de_read(display, HDCP_STATUS(display, cpu_transcoder, port))); @@ -1689,11 +1691,10 @@ intel_hdmi_hdcp2_wait_for_msg(struct intel_digital_port *dig_port, if (timeout < 0) return timeout; - ret = __wait_for(ret = hdcp2_detect_msg_availability(dig_port, - msg_id, &msg_ready, - &msg_sz), - !ret && msg_ready && msg_sz, timeout * 1000, - 1000, 5 * 1000); + ret = poll_timeout_us(ret = hdcp2_detect_msg_availability(dig_port, msg_id, + &msg_ready, &msg_sz), + !ret && msg_ready && msg_sz, + 4000, timeout * 1000, false); if (ret) drm_dbg_kms(display->drm, "msg_id: %d, ret: %d, timeout: %d\n", @@ -2053,6 +2054,10 @@ intel_hdmi_mode_valid(struct drm_connector *_connector, else sink_format = INTEL_OUTPUT_FORMAT_RGB; + status = intel_pfit_mode_valid(display, mode, sink_format, 0); + if (status != MODE_OK) + return status; + status = intel_hdmi_mode_clock_valid(&connector->base, clock, has_hdmi_sink, sink_format); if (status != MODE_OK) { if (ycbcr_420_only || @@ -2341,6 +2346,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; + if (!intel_link_bw_compute_pipe_bpp(pipe_config)) + return -EINVAL; + pipe_config->has_audio = intel_hdmi_has_audio(encoder, pipe_config, conn_state) && intel_audio_compute_config(encoder, pipe_config, conn_state); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 901fda434af1..4451a792600a 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -28,6 +28,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "i915_utils.h" #include "intel_connector.h" #include "intel_display_power.h" #include "intel_display_core.h" @@ -193,40 +194,34 @@ static bool detection_work_enabled(struct intel_display *display) static bool mod_delayed_detection_work(struct intel_display *display, struct delayed_work *work, int delay) { - struct drm_i915_private *i915 = to_i915(display->drm); - lockdep_assert_held(&display->irq.lock); if (!detection_work_enabled(display)) return false; - return mod_delayed_work(i915->unordered_wq, work, delay); + return mod_delayed_work(display->wq.unordered, work, delay); } static bool queue_delayed_detection_work(struct intel_display *display, struct delayed_work *work, int delay) { - struct drm_i915_private *i915 = to_i915(display->drm); - lockdep_assert_held(&display->irq.lock); if (!detection_work_enabled(display)) return false; - return queue_delayed_work(i915->unordered_wq, work, delay); + return queue_delayed_work(display->wq.unordered, work, delay); } static bool queue_detection_work(struct intel_display *display, struct work_struct *work) { - struct drm_i915_private *i915 = to_i915(display->drm); - lockdep_assert_held(&display->irq.lock); if (!detection_work_enabled(display)) return false; - return queue_work(i915->unordered_wq, work); + return queue_work(display->wq.unordered, work); } static void @@ -977,8 +972,6 @@ void intel_hpd_cancel_work(struct intel_display *display) spin_lock_irq(&display->irq.lock); - drm_WARN_ON(display->drm, get_blocked_hpd_pin_mask(display)); - display->hotplug.long_hpd_pin_mask = 0; display->hotplug.short_hpd_pin_mask = 0; display->hotplug.event_bits = 0; @@ -1339,12 +1332,12 @@ static const struct file_operations i915_hpd_short_storm_ctl_fops = { void intel_hpd_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_hpd_storm_ctl", 0644, minor->debugfs_root, + debugfs_create_file("i915_hpd_storm_ctl", 0644, debugfs_root, display, &i915_hpd_storm_ctl_fops); - debugfs_create_file("i915_hpd_short_storm_ctl", 0644, minor->debugfs_root, + debugfs_create_file("i915_hpd_short_storm_ctl", 0644, debugfs_root, display, &i915_hpd_short_storm_ctl_fops); - debugfs_create_bool("i915_ignore_long_hpd", 0644, minor->debugfs_root, + debugfs_create_bool("i915_ignore_long_hpd", 0644, debugfs_root, &display->hotplug.ignore_long_hpd); } diff --git a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c index 43aee70597bf..4f72f3fb9af5 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug_irq.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug_irq.c @@ -1025,7 +1025,7 @@ static void mtp_tc_hpd_enable_detection(struct intel_encoder *encoder) { struct intel_display *display = to_intel_display(encoder); - intel_de_rmw(display, SHOTPLUG_CTL_DDI, + intel_de_rmw(display, SHOTPLUG_CTL_TC, mtp_tc_hotplug_mask(encoder->hpd_pin), mtp_tc_hotplug_enables(encoder)); } diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index 3caef7f9c7c4..f52dee0ea412 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -165,6 +165,34 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, } /** + * intel_link_bw_compute_pipe_bpp - compute pipe bpp limited by max link bpp + * @crtc_state: the crtc state + * + * Compute the pipe bpp limited by the CRTC's maximum link bpp. Encoders can + * call this function during state computation in the simple case where the + * link bpp will always match the pipe bpp. This is the case for all non-DP + * encoders, while DP encoders will use a link bpp lower than pipe bpp in case + * of DSC compression. + * + * Returns %true in case of success, %false if pipe bpp would need to be + * reduced below its valid range. + */ +bool intel_link_bw_compute_pipe_bpp(struct intel_crtc_state *crtc_state) +{ + int pipe_bpp = min(crtc_state->pipe_bpp, + fxp_q4_to_int(crtc_state->max_link_bpp_x16)); + + pipe_bpp = rounddown(pipe_bpp, 2 * 3); + + if (pipe_bpp < 6 * 3) + return false; + + crtc_state->pipe_bpp = pipe_bpp; + + return true; +} + +/** * intel_link_bw_set_bpp_limit_for_pipe - set link bpp limit for a pipe to its minimum * @state: atomic state * @old_limits: link BW limits @@ -449,6 +477,7 @@ void intel_link_bw_connector_debugfs_add(struct intel_connector *connector) switch (connector->base.connector_type) { case DRM_MODE_CONNECTOR_DisplayPort: case DRM_MODE_CONNECTOR_eDP: + case DRM_MODE_CONNECTOR_HDMIA: break; case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_SVIDEO: @@ -458,11 +487,6 @@ void intel_link_bw_connector_debugfs_add(struct intel_connector *connector) break; return; - case DRM_MODE_CONNECTOR_HDMIA: - if (HAS_FDI(display) && !HAS_DDI(display)) - break; - - return; default: return; } diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index b499042e62b1..95ab7c50c61d 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -27,6 +27,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, struct intel_link_bw_limits *limits, u8 pipe_mask, const char *reason); +bool intel_link_bw_compute_pipe_bpp(struct intel_crtc_state *crtc_state); bool intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, const struct intel_link_bw_limits *old_limits, struct intel_link_bw_limits *new_limits, diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index 666148a14522..42284e9928f2 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -68,9 +68,9 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/drm_print.h> #include <drm/intel/intel_lpe_audio.h> -#include "i915_drv.h" #include "i915_irq.h" #include "intel_audio_regs.h" #include "intel_de.h" @@ -170,14 +170,11 @@ static struct irq_chip lpe_audio_irqchip = { static int lpe_audio_irq_init(struct intel_display *display) { - struct drm_i915_private *dev_priv = to_i915(display->drm); int irq = display->audio.lpe.irq; - drm_WARN_ON(display->drm, !intel_irqs_enabled(dev_priv)); - irq_set_chip_and_handler_name(irq, - &lpe_audio_irqchip, - handle_simple_irq, - "hdmi_lpe_audio_irq_handler"); + irq_set_chip_and_handler_name(irq, &lpe_audio_irqchip, + handle_simple_irq, + "hdmi_lpe_audio_irq_handler"); return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index abc4b562083d..d56026c4efdd 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -23,6 +23,8 @@ * */ +#include <linux/iopoll.h> + #include <drm/display/drm_dp_dual_mode_helper.h> #include <drm/display/drm_hdmi_helper.h> #include <drm/drm_atomic_helper.h> @@ -181,6 +183,8 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; + int timeout_us; + int ret; current_mode = lspcon_get_current_mode(lspcon); if (current_mode == mode) @@ -189,9 +193,12 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, drm_dbg_kms(display->drm, "Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); - wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, - lspcon_get_mode_settle_timeout(lspcon)); - if (current_mode != mode) + timeout_us = lspcon_get_mode_settle_timeout(lspcon) * 1000; + + ret = poll_timeout_us(current_mode = lspcon_get_current_mode(lspcon), + current_mode == mode, + 5000, timeout_us, false); + if (ret) drm_err(display->drm, "LSPCON mode hasn't settled\n"); out: diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 7e48a235c99f..48f4d8ed4f15 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -48,6 +48,7 @@ #include "intel_dpll.h" #include "intel_fdi.h" #include "intel_gmbus.h" +#include "intel_link_bw.h" #include "intel_lvds.h" #include "intel_lvds_regs.h" #include "intel_panel.h" @@ -433,7 +434,7 @@ static int intel_lvds_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(display)) { crtc_state->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(crtc_state)) + if (!intel_link_bw_compute_pipe_bpp(crtc_state)) return -EINVAL; } diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 05e1b309ba2c..8415f3d703ed 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -961,7 +961,7 @@ void intel_modeset_setup_hw_state(struct intel_display *display, drm_crtc_vblank_reset(&crtc->base); if (crtc_state->hw.active) { - intel_dmc_enable_pipe(display, crtc->pipe); + intel_dmc_enable_pipe(crtc_state); intel_crtc_vblank_on(crtc_state); } } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 5535cb799431..cbc220310813 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -28,11 +28,13 @@ #include <linux/acpi.h> #include <linux/debugfs.h> #include <linux/dmi.h> +#include <linux/iopoll.h> #include <acpi/video.h> #include <drm/drm_edid.h> +#include <drm/drm_file.h> +#include <drm/drm_print.h> -#include "i915_drv.h" #include "intel_acpi.h" #include "intel_backlight.h" #include "intel_display_core.h" @@ -355,10 +357,12 @@ static int swsci(struct intel_display *display, pci_write_config_word(pdev, SWSCI, swsci_val); /* Poll for the result. */ -#define C (((scic = swsci->scic) & SWSCI_SCIC_INDICATOR) == 0) - if (wait_for(C, dslp)) { + ret = poll_timeout_us(scic = swsci->scic, + (scic & SWSCI_SCIC_INDICATOR) == 0, + 1000, dslp * 1000, false); + if (ret) { drm_dbg(display->drm, "SWSCI request timed out\n"); - return -ETIMEDOUT; + return ret; } scic = (scic & SWSCI_SCIC_EXIT_STATUS_MASK) >> @@ -665,11 +669,10 @@ bool intel_opregion_asle_present(struct intel_display *display) void intel_opregion_asle_intr(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); struct intel_opregion *opregion = display->opregion; if (opregion && opregion->asle) - queue_work(i915->unordered_wq, &opregion->asle_work); + queue_work(display->wq.unordered, &opregion->asle_work); } #define ACPI_EV_DISPLAY_SWITCH (1<<0) @@ -1298,8 +1301,6 @@ DEFINE_SHOW_ATTRIBUTE(intel_opregion); void intel_opregion_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; - - debugfs_create_file("i915_opregion", 0444, minor->debugfs_root, + debugfs_create_file("i915_opregion", 0444, display->drm->debugfs_root, display, &intel_opregion_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 159a5f998ea0..272f9e7af4d4 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -217,10 +217,9 @@ static void i830_overlay_clock_gating(struct intel_display *display, /* WA_OVERLAY_CLKGATE:alm */ if (enable) - intel_de_write(display, DSPCLK_GATE_D(display), 0); + intel_de_write(display, DSPCLK_GATE_D, 0); else - intel_de_write(display, DSPCLK_GATE_D(display), - OVRUNIT_CLOCK_GATE_DISABLE); + intel_de_write(display, DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); /* WA_DISABLE_L2CACHE_CLOCK_GATING:alm */ pci_bus_read_config_byte(pdev->bus, diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index f956919dc648..2a20aaaaac39 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -502,7 +502,7 @@ static void intel_panel_sync_state(struct intel_connector *connector) drm_modeset_unlock(&display->drm->mode_config.connection_mutex); } -const struct drm_panel_funcs dummy_panel_funcs = { +static const struct drm_panel_funcs dummy_panel_funcs = { }; int intel_panel_register(struct intel_connector *connector) @@ -515,7 +515,8 @@ int intel_panel_register(struct intel_connector *connector) if (ret) return ret; - if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI) { + if (connector->base.connector_type == DRM_MODE_CONNECTOR_DSI || + connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) { struct device *dev = connector->base.kdev; struct drm_panel *base; diff --git a/drivers/gpu/drm/i915/display/intel_panic.c b/drivers/gpu/drm/i915/display/intel_panic.c new file mode 100644 index 000000000000..7311ce4e8b6c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_panic.h> + +#include "gem/i915_gem_object.h" +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" + +struct intel_panic *intel_panic_alloc(void) +{ + return i915_gem_object_alloc_panic(); +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = sb->private; + struct drm_gem_object *obj = intel_fb_bo(&fb->base); + + return i915_gem_object_panic_setup(panic, sb, obj, fb->panic_tiling); +} + +void intel_panic_finish(struct intel_panic *panic) +{ + return i915_gem_object_panic_finish(panic); +} diff --git a/drivers/gpu/drm/i915/display/intel_panic.h b/drivers/gpu/drm/i915/display/intel_panic.h new file mode 100644 index 000000000000..afb472e924aa --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_panic.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __INTEL_PANIC_H__ +#define __INTEL_PANIC_H__ + +struct drm_scanout_buffer; +struct intel_panic; + +struct intel_panic *intel_panic_alloc(void); +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb); +void intel_panic_finish(struct intel_panic *panic); + +#endif /* __INTEL_PANIC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_pch.h b/drivers/gpu/drm/i915/display/intel_pch.h index cf4dab1b98bf..19cac7412d0a 100644 --- a/drivers/gpu/drm/i915/display/intel_pch.h +++ b/drivers/gpu/drm/i915/display/intel_pch.h @@ -6,8 +6,6 @@ #ifndef __INTEL_PCH__ #define __INTEL_PCH__ -#include "intel_display_conversion.h" - struct intel_display; /* @@ -36,7 +34,7 @@ enum intel_pch { PCH_LNL, }; -#define INTEL_PCH_TYPE(_display) (__to_intel_display(_display)->pch_type) +#define INTEL_PCH_TYPE(_display) ((_display)->pch_type) #define HAS_PCH_DG2(display) (INTEL_PCH_TYPE(display) == PCH_DG2) #define HAS_PCH_ADP(display) (INTEL_PCH_TYPE(display) == PCH_ADP) #define HAS_PCH_DG1(display) (INTEL_PCH_TYPE(display) == PCH_DG1) diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c index d3c5255bf1a8..9ae53679a041 100644 --- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c +++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c @@ -17,16 +17,22 @@ static void lpt_fdi_reset_mphy(struct intel_display *display) { + int ret; + intel_de_rmw(display, SOUTH_CHICKEN2, 0, FDI_MPHY_IOSFSB_RESET_CTL); - if (wait_for_us(intel_de_read(display, SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS, 100)) + ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, FDI_MPHY_IOSFSB_RESET_STATUS, + 100, 0, NULL); + if (ret) drm_err(display->drm, "FDI mPHY reset assert timeout\n"); intel_de_rmw(display, SOUTH_CHICKEN2, FDI_MPHY_IOSFSB_RESET_CTL, 0); - if (wait_for_us((intel_de_read(display, SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) + ret = intel_de_wait_custom(display, SOUTH_CHICKEN2, + FDI_MPHY_IOSFSB_RESET_STATUS, 0, + 100, 0, NULL); + if (ret) drm_err(display->drm, "FDI mPHY reset de-assert timeout\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_pfit.c b/drivers/gpu/drm/i915/display/intel_pfit.c index 13541be4d6df..68539e7c2a24 100644 --- a/drivers/gpu/drm/i915/display/intel_pfit.c +++ b/drivers/gpu/drm/i915/display/intel_pfit.c @@ -14,6 +14,7 @@ #include "intel_lvds_regs.h" #include "intel_pfit.h" #include "intel_pfit_regs.h" +#include "skl_scaler.h" static int intel_pch_pfit_check_dst_window(const struct intel_crtc_state *crtc_state) { @@ -546,6 +547,16 @@ out: return intel_gmch_pfit_check_timings(crtc_state); } +enum drm_mode_status +intel_pfit_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes) +{ + return skl_scaler_mode_valid(display, mode, output_format, + num_joined_pipes); +} + int intel_pfit_compute_config(struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { diff --git a/drivers/gpu/drm/i915/display/intel_pfit.h b/drivers/gpu/drm/i915/display/intel_pfit.h index ef34f9b49d09..c1bb0d1f344e 100644 --- a/drivers/gpu/drm/i915/display/intel_pfit.h +++ b/drivers/gpu/drm/i915/display/intel_pfit.h @@ -6,8 +6,12 @@ #ifndef __INTEL_PFIT_H__ #define __INTEL_PFIT_H__ +enum drm_mode_status; +struct drm_display_mode; struct drm_connector_state; struct intel_crtc_state; +struct intel_display; +enum intel_output_format; int intel_pfit_compute_config(struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); @@ -17,5 +21,9 @@ void ilk_pfit_get_config(struct intel_crtc_state *crtc_state); void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state); void i9xx_pfit_disable(const struct intel_crtc_state *old_crtc_state); void i9xx_pfit_get_config(struct intel_crtc_state *crtc_state); - +enum drm_mode_status +intel_pfit_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes); #endif /* __INTEL_PFIT_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 15ede7678636..2329f09d413d 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -33,19 +33,20 @@ #include <linux/dma-fence-chain.h> #include <linux/dma-resv.h> +#include <linux/iosys-map.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include <drm/drm_cache.h> #include <drm/drm_damage_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem.h> #include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_panic.h> #include "gem/i915_gem_object.h" #include "i915_scheduler_types.h" -#include "i915_vma.h" #include "i9xx_plane_regs.h" -#include "intel_atomic_plane.h" #include "intel_cdclk.h" #include "intel_cursor.h" #include "intel_display_rps.h" @@ -53,6 +54,10 @@ #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fb_pin.h" +#include "intel_fbdev.h" +#include "intel_panic.h" +#include "intel_plane.h" +#include "intel_psr.h" #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" @@ -333,7 +338,7 @@ int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, * display blinking due to constant cdclk changes. */ if (new_crtc_state->min_cdclk[plane->id] <= - cdclk_state->min_cdclk[crtc->pipe]) + intel_cdclk_min_cdclk(cdclk_state, crtc->pipe)) return 0; drm_dbg_kms(display->drm, @@ -341,7 +346,7 @@ int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, plane->base.base.id, plane->base.name, new_crtc_state->min_cdclk[plane->id], crtc->base.base.id, crtc->base.name, - cdclk_state->min_cdclk[crtc->pipe]); + intel_cdclk_min_cdclk(cdclk_state, crtc->pipe)); *need_cdclk_calc = true; return 0; @@ -734,8 +739,8 @@ intel_crtc_get_plane(struct intel_crtc *crtc, enum plane_id plane_id) return NULL; } -int intel_plane_atomic_check(struct intel_atomic_state *state, - struct intel_plane *plane) +static int plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane) { struct intel_display *display = to_intel_display(state); struct intel_plane_state *new_plane_state = @@ -983,10 +988,10 @@ void intel_crtc_planes_update_arm(struct intel_dsb *dsb, i9xx_crtc_planes_update_arm(dsb, state, crtc); } -int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, - struct intel_crtc_state *crtc_state, - int min_scale, int max_scale, - bool can_position) +int intel_plane_check_clipping(struct intel_plane_state *plane_state, + struct intel_crtc_state *crtc_state, + int min_scale, int max_scale, + bool can_position) { struct intel_display *display = to_intel_display(plane_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); @@ -1085,7 +1090,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) /* Wa_16023981245 */ if ((DISPLAY_VERx100(display) == 2000 || - DISPLAY_VERx100(display) == 3000) && + DISPLAY_VERx100(display) == 3000 || + DISPLAY_VERx100(display) == 3002) && src_x % 2 != 0) hsub = 2; } else { @@ -1266,14 +1272,176 @@ intel_cleanup_plane_fb(struct drm_plane *plane, intel_plane_unpin_fb(old_plane_state); } +/* Handle Y-tiling, only if DPT is enabled (otherwise disabling tiling is easier) + * All DPT hardware have 128-bytes width tiling, so Y-tile dimension is 32x32 + * pixels for 32bits pixels. + */ +#define YTILE_WIDTH 32 +#define YTILE_HEIGHT 32 +#define YTILE_SIZE (YTILE_WIDTH * YTILE_HEIGHT * 4) + +static unsigned int intel_ytile_get_offset(unsigned int width, unsigned int x, unsigned int y) +{ + u32 offset; + unsigned int swizzle; + unsigned int width_in_blocks = DIV_ROUND_UP(width, 32); + + /* Block offset */ + offset = ((y / YTILE_HEIGHT) * width_in_blocks + (x / YTILE_WIDTH)) * YTILE_SIZE; + + x = x % YTILE_WIDTH; + y = y % YTILE_HEIGHT; + + /* bit order inside a block is x4 x3 x2 y4 y3 y2 y1 y0 x1 x0 */ + swizzle = (x & 3) | ((y & 0x1f) << 2) | ((x & 0x1c) << 5); + offset += swizzle * 4; + return offset; +} + +static unsigned int intel_4tile_get_offset(unsigned int width, unsigned int x, unsigned int y) +{ + u32 offset; + unsigned int swizzle; + unsigned int width_in_blocks = DIV_ROUND_UP(width, 32); + + /* Block offset */ + offset = ((y / YTILE_HEIGHT) * width_in_blocks + (x / YTILE_WIDTH)) * YTILE_SIZE; + + x = x % YTILE_WIDTH; + y = y % YTILE_HEIGHT; + + /* bit order inside a block is y4 y3 x4 y2 x3 x2 y1 y0 x1 x0 */ + swizzle = (x & 3) | ((y & 3) << 2) | ((x & 0xc) << 2) | (y & 4) << 4 | + ((x & 0x10) << 3) | ((y & 0x18) << 5); + offset += swizzle * 4; + return offset; +} + +static void intel_panic_flush(struct drm_plane *plane) +{ + struct intel_plane_state *plane_state = to_intel_plane_state(plane->state); + struct intel_crtc_state *crtc_state = to_intel_crtc_state(plane->state->crtc->state); + struct intel_plane *iplane = to_intel_plane(plane); + struct intel_display *display = to_intel_display(iplane); + struct drm_framebuffer *fb = plane_state->hw.fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + + intel_panic_finish(intel_fb->panic); + + if (crtc_state->enable_psr2_sel_fetch) { + /* Force a full update for psr2 */ + intel_psr2_panic_force_full_update(display, crtc_state); + } + + /* Flush the cache and don't disable tiling if it's the fbdev framebuffer.*/ + if (intel_fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) { + struct iosys_map map; + + intel_fbdev_get_map(display->fbdev.fbdev, &map); + drm_clflush_virt_range(map.vaddr, fb->pitches[0] * fb->height); + return; + } + + if (fb->modifier && iplane->disable_tiling) + iplane->disable_tiling(iplane); +} + +static unsigned int (*intel_get_tiling_func(u64 fb_modifier))(unsigned int width, + unsigned int x, + unsigned int y) +{ + switch (fb_modifier) { + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: + case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: + case I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS: + return intel_ytile_get_offset; + case I915_FORMAT_MOD_4_TILED: + case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS: + case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS: + case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC: + case I915_FORMAT_MOD_4_TILED_MTL_RC_CCS: + case I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC: + case I915_FORMAT_MOD_4_TILED_MTL_MC_CCS: + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: + return intel_4tile_get_offset; + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Yf_TILED: + case I915_FORMAT_MOD_Yf_TILED_CCS: + default: + /* Not supported yet */ + return NULL; + } +} + +static int intel_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb) +{ + struct intel_plane_state *plane_state; + struct drm_gem_object *obj; + struct drm_framebuffer *fb; + struct intel_framebuffer *intel_fb; + struct intel_display *display = to_intel_display(plane->dev); + + if (!plane->state || !plane->state->fb || !plane->state->visible) + return -ENODEV; + + plane_state = to_intel_plane_state(plane->state); + fb = plane_state->hw.fb; + intel_fb = to_intel_framebuffer(fb); + + obj = intel_fb_bo(fb); + if (!obj) + return -ENODEV; + + if (intel_fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) { + intel_fbdev_get_map(display->fbdev.fbdev, &sb->map[0]); + } else { + int ret; + /* Can't disable tiling if DPT is in use */ + if (intel_fb_uses_dpt(fb)) { + if (fb->format->cpp[0] != 4) + return -EOPNOTSUPP; + intel_fb->panic_tiling = intel_get_tiling_func(fb->modifier); + if (!intel_fb->panic_tiling) + return -EOPNOTSUPP; + } + sb->private = intel_fb; + ret = intel_panic_setup(intel_fb->panic, sb); + if (ret) + return ret; + } + sb->width = fb->width; + sb->height = fb->height; + /* Use the generic linear format, because tiling, RC, CCS, CC + * will be disabled in disable_tiling() + */ + sb->format = drm_format_info(fb->format->format); + sb->pitch[0] = fb->pitches[0]; + + return 0; +} + static const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, }; +static const struct drm_plane_helper_funcs intel_primary_plane_helper_funcs = { + .prepare_fb = intel_prepare_plane_fb, + .cleanup_fb = intel_cleanup_plane_fb, + .get_scanout_buffer = intel_get_scanout_buffer, + .panic_flush = intel_panic_flush, +}; + void intel_plane_helper_add(struct intel_plane *plane) { - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) + drm_plane_helper_add(&plane->base, &intel_primary_plane_helper_funcs); + else + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); } void intel_plane_init_cursor_vblank_work(struct intel_plane_state *old_plane_state, @@ -1433,8 +1601,8 @@ static int intel_crtc_add_planes_to_state(struct intel_atomic_state *state, return 0; } -int intel_atomic_add_affected_planes(struct intel_atomic_state *state, - struct intel_crtc *crtc) +int intel_plane_add_affected(struct intel_atomic_state *state, + struct intel_crtc *crtc) { const struct intel_crtc_state *old_crtc_state = intel_atomic_get_old_crtc_state(state, crtc); @@ -1528,7 +1696,7 @@ static int intel_add_affected_planes(struct intel_atomic_state *state) return 0; } -int intel_atomic_check_planes(struct intel_atomic_state *state) +int intel_plane_atomic_check(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -1542,7 +1710,7 @@ int intel_atomic_check_planes(struct intel_atomic_state *state) return ret; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - ret = intel_plane_atomic_check(state, plane); + ret = plane_atomic_check(state, plane); if (ret) { drm_dbg_atomic(display->drm, "[PLANE:%d:%s] atomic driver check failed\n", @@ -1580,8 +1748,3 @@ int intel_atomic_check_planes(struct intel_atomic_state *state) return 0; } - -u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state) -{ - return i915_ggtt_offset(plane_state->ggtt_vma); -} diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_plane.h index 317320c32285..8af41ccc0a69 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_plane.h @@ -3,8 +3,8 @@ * Copyright © 2019 Intel Corporation */ -#ifndef __INTEL_ATOMIC_PLANE_H__ -#define __INTEL_ATOMIC_PLANE_H__ +#ifndef __INTEL_PLANE_H__ +#define __INTEL_PLANE_H__ #include <linux/types.h> @@ -69,15 +69,13 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); -int intel_plane_atomic_check(struct intel_atomic_state *state, - struct intel_plane *plane); int intel_plane_calc_min_cdclk(struct intel_atomic_state *state, struct intel_plane *plane, bool *need_cdclk_calc); -int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, - struct intel_crtc_state *crtc_state, - int min_scale, int max_scale, - bool can_position); +int intel_plane_check_clipping(struct intel_plane_state *plane_state, + struct intel_crtc_state *crtc_state, + int min_scale, int max_scale, + bool can_position); int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state); void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); @@ -85,13 +83,12 @@ void intel_plane_helper_add(struct intel_plane *plane); bool intel_plane_needs_physical(struct intel_plane *plane); void intel_plane_init_cursor_vblank_work(struct intel_plane_state *old_plane_state, struct intel_plane_state *new_plane_state); -int intel_atomic_add_affected_planes(struct intel_atomic_state *state, - struct intel_crtc *crtc); -int intel_atomic_check_planes(struct intel_atomic_state *state); +int intel_plane_add_affected(struct intel_atomic_state *state, + struct intel_crtc *crtc); +int intel_plane_atomic_check(struct intel_atomic_state *state); -u32 intel_plane_ggtt_offset(const struct intel_plane_state *plane_state); bool intel_plane_format_mod_supported_async(struct drm_plane *plane, u32 format, u64 modifier); -#endif /* __INTEL_ATOMIC_PLANE_H__ */ +#endif /* __INTEL_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 8800a657cd21..a9f36b1b50c1 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -6,13 +6,13 @@ #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "i915_drv.h" -#include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_display.h" #include "intel_display_core.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" void intel_plane_initial_vblank_wait(struct intel_crtc *crtc) @@ -289,7 +289,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.flags = DRM_MODE_FB_MODIFIERS; if (intel_framebuffer_init(to_intel_framebuffer(fb), - intel_bo_to_drm_bo(vma->obj), &mode_cmd)) { + intel_bo_to_drm_bo(vma->obj), + fb->format, &mode_cmd)) { drm_dbg_kms(display->drm, "intel fb init failed\n"); goto err_vma; } @@ -359,6 +360,8 @@ valid_fb: i915_vma_pin_fence(vma) == 0 && vma->fence) plane_state->flags |= PLANE_HAS_FENCE; + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma); + plane_state->uapi.src_x = 0; plane_state->uapi.src_y = 0; plane_state->uapi.src_w = fb->width << 16; diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index 93d5ee36fff1..d806c15db7ce 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -294,40 +294,17 @@ intel_pmdemand_connector_needs_update(struct intel_atomic_state *state) static bool intel_pmdemand_needs_update(struct intel_atomic_state *state) { - struct intel_display *display = to_intel_display(state); - const struct intel_bw_state *new_bw_state, *old_bw_state; - const struct intel_cdclk_state *new_cdclk_state, *old_cdclk_state; const struct intel_crtc_state *new_crtc_state, *old_crtc_state; - const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state; struct intel_crtc *crtc; int i; - new_bw_state = intel_atomic_get_new_bw_state(state); - old_bw_state = intel_atomic_get_old_bw_state(state); - if (new_bw_state && new_bw_state->qgv_point_peakbw != - old_bw_state->qgv_point_peakbw) + if (intel_bw_pmdemand_needs_update(state)) return true; - new_dbuf_state = intel_atomic_get_new_dbuf_state(state); - old_dbuf_state = intel_atomic_get_old_dbuf_state(state); - if (new_dbuf_state && - new_dbuf_state->active_pipes != old_dbuf_state->active_pipes) + if (intel_dbuf_pmdemand_needs_update(state)) return true; - if (DISPLAY_VER(display) < 30) { - if (new_dbuf_state && - new_dbuf_state->enabled_slices != - old_dbuf_state->enabled_slices) - return true; - } - - new_cdclk_state = intel_atomic_get_new_cdclk_state(state); - old_cdclk_state = intel_atomic_get_old_cdclk_state(state); - if (new_cdclk_state && - (new_cdclk_state->actual.cdclk != - old_cdclk_state->actual.cdclk || - new_cdclk_state->actual.voltage_level != - old_cdclk_state->actual.voltage_level)) + if (intel_cdclk_pmdemand_needs_update(state)) return true; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, @@ -362,7 +339,7 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) /* firmware will calculate the qclk_gv_index, requirement is set to 0 */ new_pmdemand_state->params.qclk_gv_index = 0; - new_pmdemand_state->params.qclk_gv_bw = new_bw_state->qgv_point_peakbw; + new_pmdemand_state->params.qclk_gv_bw = intel_bw_qgv_point_peakbw(new_bw_state); new_dbuf_state = intel_atomic_get_dbuf_state(state); if (IS_ERR(new_dbuf_state)) @@ -370,12 +347,12 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) if (DISPLAY_VER(display) < 30) { new_pmdemand_state->params.active_dbufs = - min_t(u8, hweight8(new_dbuf_state->enabled_slices), 3); + min_t(u8, intel_dbuf_num_enabled_slices(new_dbuf_state), 3); new_pmdemand_state->params.active_pipes = - min_t(u8, hweight8(new_dbuf_state->active_pipes), 3); + min_t(u8, intel_dbuf_num_active_pipes(new_dbuf_state), 3); } else { new_pmdemand_state->params.active_pipes = - min_t(u8, hweight8(new_dbuf_state->active_pipes), INTEL_NUM_PIPES(display)); + min_t(u8, intel_dbuf_num_active_pipes(new_dbuf_state), INTEL_NUM_PIPES(display)); } new_cdclk_state = intel_atomic_get_cdclk_state(state); @@ -383,9 +360,9 @@ int intel_pmdemand_atomic_check(struct intel_atomic_state *state) return PTR_ERR(new_cdclk_state); new_pmdemand_state->params.voltage_index = - new_cdclk_state->actual.voltage_level; + intel_cdclk_actual_voltage_level(new_cdclk_state); new_pmdemand_state->params.cdclk_freq_mhz = - DIV_ROUND_UP(new_cdclk_state->actual.cdclk, 1000); + DIV_ROUND_UP(intel_cdclk_actual(new_cdclk_state), 1000); intel_pmdemand_update_max_ddiclk(display, state, new_pmdemand_state); diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index bff81fb5c316..327e0de86f1e 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -4,10 +4,13 @@ */ #include <linux/debugfs.h> +#include <linux/iopoll.h> + +#include <drm/drm_print.h> #include "g4x_dp.h" -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_de.h" #include "intel_display_power_well.h" #include "intel_display_regs.h" @@ -606,6 +609,8 @@ static void wait_panel_status(struct intel_dp *intel_dp, struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); i915_reg_t pp_stat_reg, pp_ctrl_reg; + int ret; + u32 val; lockdep_assert_held(&display->pps.mutex); @@ -622,13 +627,18 @@ static void wait_panel_status(struct intel_dp *intel_dp, intel_de_read(display, pp_stat_reg), intel_de_read(display, pp_ctrl_reg)); - if (intel_de_wait(display, pp_stat_reg, mask, value, 5000)) + ret = poll_timeout_us(val = intel_de_read(display, pp_stat_reg), + (val & mask) == value, + 10 * 1000, 5000 * 1000, true); + if (ret) { drm_err(display->drm, "[ENCODER:%d:%s] %s panel status timeout: PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n", dig_port->base.base.base.id, dig_port->base.base.name, pps_name(intel_dp), intel_de_read(display, pp_stat_reg), intel_de_read(display, pp_ctrl_reg)); + return; + } drm_dbg_kms(display->drm, "Wait complete\n"); } @@ -892,7 +902,6 @@ static void edp_panel_vdd_work(struct work_struct *__work) static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); unsigned long delay; /* @@ -908,7 +917,7 @@ static void edp_panel_vdd_schedule_off(struct intel_dp *intel_dp) * operations. */ delay = msecs_to_jiffies(intel_dp->pps.panel_power_cycle_delay * 5); - queue_delayed_work(i915->unordered_wq, + queue_delayed_work(display->wq.unordered, &intel_dp->pps.panel_vdd_work, delay); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 8bee2f592ae7..10eb93a34cf2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -28,7 +28,6 @@ #include <drm/drm_debugfs.h> #include <drm/drm_vblank.h> -#include "i915_drv.h" #include "i915_reg.h" #include "intel_alpm.h" #include "intel_atomic.h" @@ -43,11 +42,13 @@ #include "intel_dmc.h" #include "intel_dp.h" #include "intel_dp_aux.h" +#include "intel_dsb.h" #include "intel_frontbuffer.h" #include "intel_hdmi.h" #include "intel_psr.h" #include "intel_psr_regs.h" #include "intel_snps_phy.h" +#include "intel_step.h" #include "intel_vblank.h" #include "intel_vrr.h" #include "skl_universal_plane.h" @@ -233,16 +234,12 @@ bool intel_psr_needs_aux_io_power(struct intel_encoder *encoder, static bool psr_global_enabled(struct intel_dp *intel_dp) { - struct intel_display *display = to_intel_display(intel_dp); struct intel_connector *connector = intel_dp->attached_connector; switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DEFAULT: - if (display->params.enable_psr == -1) - return intel_dp_is_edp(intel_dp) ? - connector->panel.vbt.psr.enable : - true; - return display->params.enable_psr; + return intel_dp_is_edp(intel_dp) ? + connector->panel.vbt.psr.enable : true; case I915_PSR_DEBUG_DISABLE: return false; default: @@ -250,39 +247,23 @@ static bool psr_global_enabled(struct intel_dp *intel_dp) } } -static bool psr2_global_enabled(struct intel_dp *intel_dp) +static bool sel_update_global_enabled(struct intel_dp *intel_dp) { - struct intel_display *display = to_intel_display(intel_dp); - switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) { case I915_PSR_DEBUG_DISABLE: case I915_PSR_DEBUG_FORCE_PSR1: return false; default: - if (display->params.enable_psr == 1) - return false; return true; } } -static bool psr2_su_region_et_global_enabled(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - if (display->params.enable_psr != -1) - return false; - - return true; -} - static bool panel_replay_global_enabled(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - if ((display->params.enable_psr != -1) || - (intel_dp->psr.debug & I915_PSR_DEBUG_PANEL_REPLAY_DISABLE)) - return false; - return true; + return !(intel_dp->psr.debug & I915_PSR_DEBUG_PANEL_REPLAY_DISABLE) && + display->params.enable_panel_replay; } static u32 psr_irq_psr_error_bit_get(struct intel_dp *intel_dp) @@ -448,7 +429,6 @@ static void psr_event_print(struct intel_display *display, void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); enum transcoder cpu_transcoder = intel_dp->psr.transcoder; ktime_t time_ns = ktime_get(); @@ -493,7 +473,7 @@ void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir) intel_de_rmw(display, psr_imr_reg(display, cpu_transcoder), 0, psr_irq_psr_error_bit_get(intel_dp)); - queue_work(dev_priv->unordered_wq, &intel_dp->psr.work); + queue_work(display->wq.unordered, &intel_dp->psr.work); } } @@ -515,12 +495,14 @@ static u8 intel_dp_get_su_capability(struct intel_dp *intel_dp) { u8 su_capability = 0; - if (intel_dp->psr.sink_panel_replay_su_support) - drm_dp_dpcd_readb(&intel_dp->aux, - DP_PANEL_REPLAY_CAP_CAPABILITY, - &su_capability); - else + if (intel_dp->psr.sink_panel_replay_su_support) { + if (drm_dp_dpcd_read_byte(&intel_dp->aux, + DP_PANEL_REPLAY_CAP_CAPABILITY, + &su_capability) < 0) + return 0; + } else { su_capability = intel_dp->psr_dpcd[1]; + } return su_capability; } @@ -601,6 +583,16 @@ exit: static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); + int ret; + + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, + &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); + if (ret < 0) + return; + + if (!(intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & + DP_PANEL_REPLAY_SUPPORT)) + return; if (intel_dp_is_edp(intel_dp)) { if (!intel_alpm_aux_less_wake_supported(intel_dp)) { @@ -632,6 +624,15 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) static void _psr_init_dpcd(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); + int ret; + + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, + sizeof(intel_dp->psr_dpcd)); + if (ret < 0) + return; + + if (!intel_dp->psr_dpcd[0]) + return; drm_dbg_kms(display->drm, "eDP panel supports PSR version %x\n", intel_dp->psr_dpcd[0]); @@ -677,18 +678,9 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp) void intel_psr_init_dpcd(struct intel_dp *intel_dp) { - drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, - sizeof(intel_dp->psr_dpcd)); - - drm_dp_dpcd_read(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, - &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); + _psr_init_dpcd(intel_dp); - if (intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & - DP_PANEL_REPLAY_SUPPORT) - _panel_replay_init_dpcd(intel_dp); - - if (intel_dp->psr_dpcd[0]) - _psr_init_dpcd(intel_dp); + _panel_replay_init_dpcd(intel_dp); if (intel_dp->psr.sink_psr2_support || intel_dp->psr.sink_panel_replay_su_support) @@ -743,8 +735,7 @@ static bool psr2_su_region_et_valid(struct intel_dp *intel_dp, bool panel_replay return panel_replay ? intel_dp->pr_dpcd[INTEL_PR_DPCD_INDEX(DP_PANEL_REPLAY_CAP_SUPPORT)] & DP_PANEL_REPLAY_EARLY_TRANSPORT_SUPPORT : - intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_ET_SUPPORTED && - psr2_su_region_et_global_enabled(intel_dp); + intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_ET_SUPPORTED; } static void _panel_replay_enable_sink(struct intel_dp *intel_dp, @@ -937,7 +928,7 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) /* Wa_16025596647 */ if ((DISPLAY_VER(display) == 20 || IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && - is_dc5_dc6_blocked(intel_dp)) + is_dc5_dc6_blocked(intel_dp) && intel_dp->psr.pkg_c_latency_used) intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display, intel_dp->psr.pipe, true); @@ -1027,7 +1018,7 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) /* Wa_16025596647 */ if ((DISPLAY_VER(display) == 20 || IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && - is_dc5_dc6_blocked(intel_dp)) + is_dc5_dc6_blocked(intel_dp) && intel_dp->psr.pkg_c_latency_used) idle_frames = 0; else idle_frames = psr_compute_idle_frames(intel_dp); @@ -1424,7 +1415,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay; int psr_max_h = 0, psr_max_v = 0, max_bpp = 0; - if (!intel_dp->psr.sink_psr2_support) + if (!intel_dp->psr.sink_psr2_support || display->params.enable_psr == 1) return false; /* JSL and EHL only supports eDP 1.3 */ @@ -1529,7 +1520,7 @@ static bool intel_sel_update_config_valid(struct intel_dp *intel_dp, goto unsupported; } - if (!psr2_global_enabled(intel_dp)) { + if (!sel_update_global_enabled(intel_dp)) { drm_dbg_kms(display->drm, "Selective update disabled by flag\n"); goto unsupported; @@ -1577,7 +1568,7 @@ static bool _psr_compute_config(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int entry_setup_frames; - if (!CAN_PSR(intel_dp)) + if (!CAN_PSR(intel_dp) || !display->params.enable_psr) return false; /* @@ -1809,6 +1800,8 @@ static void intel_psr_activate(struct intel_dp *intel_dp) drm_WARN_ON(display->drm, intel_dp->psr.active); + drm_WARN_ON(display->drm, !intel_dp->psr.enabled); + lockdep_assert_held(&intel_dp->psr.lock); /* psr1, psr2 and panel-replay are mutually exclusive.*/ @@ -2028,6 +2021,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.req_psr2_sdp_prior_scanline = crtc_state->req_psr2_sdp_prior_scanline; intel_dp->psr.active_non_psr_pipes = crtc_state->active_non_psr_pipes; + intel_dp->psr.pkg_c_latency_used = crtc_state->pkg_c_latency_used; if (!psr_interrupt_error_check(intel_dp)) return; @@ -2104,8 +2098,9 @@ static void intel_psr_exit(struct intel_dp *intel_dp) drm_WARN_ON(display->drm, !(val & EDP_PSR2_ENABLE)); } else { - if (DISPLAY_VER(display) == 20 || - IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) + if ((DISPLAY_VER(display) == 20 || + IS_DISPLAY_VERx100_STEP(display, 3000, STEP_A0, STEP_B0)) && + intel_dp->psr.pkg_c_latency_used) intel_dmc_start_pkgc_exit_at_start_of_undelayed_vblank(display, intel_dp->psr.pipe, false); @@ -2208,6 +2203,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_dp->psr.su_region_et_enabled = false; intel_dp->psr.psr2_sel_fetch_cff_enabled = false; intel_dp->psr.active_non_psr_pipes = 0; + intel_dp->psr.pkg_c_latency_used = 0; } /** @@ -2889,6 +2885,26 @@ skip_sel_fetch_set_loop: return 0; } +void intel_psr2_panic_force_full_update(struct intel_display *display, + struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 val = man_trk_ctl_enable_bit_get(display); + + /* SF partial frame enable has to be set even on full update */ + val |= man_trk_ctl_partial_frame_bit_get(display); + val |= man_trk_ctl_continuos_full_frame(display); + + /* Directly write the register */ + intel_de_write_fw(display, PSR2_MAN_TRK_CTL(display, cpu_transcoder), val); + + if (!crtc_state->enable_psr2_su_region_et) + return; + + intel_de_write_fw(display, PIPE_SRCSZ_ERLY_TPT(crtc->pipe), 0); +} + void intel_psr_pre_plane_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -2984,35 +3000,57 @@ void intel_psr_post_plane_update(struct intel_atomic_state *state, } } -static int _psr2_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +/* + * From bspec: Panel Self Refresh (BDW+) + * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of + * exit training time + 1.5 ms of aux channel handshake. 50 ms is + * defensive enough to cover everything. + */ +#define PSR_IDLE_TIMEOUT_MS 50 + +static int +_psr2_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; /* * Any state lower than EDP_PSR2_STATUS_STATE_DEEP_SLEEP is enough. * As all higher states has bit 4 of PSR2 state set we can just wait for * EDP_PSR2_STATUS_STATE_DEEP_SLEEP to be cleared. */ + if (dsb) { + intel_dsb_poll(dsb, EDP_PSR2_STATUS(display, cpu_transcoder), + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } + return intel_de_wait_for_clear(display, EDP_PSR2_STATUS(display, cpu_transcoder), - EDP_PSR2_STATUS_STATE_DEEP_SLEEP, 50); + EDP_PSR2_STATUS_STATE_DEEP_SLEEP, + PSR_IDLE_TIMEOUT_MS); } -static int _psr1_ready_for_pipe_update_locked(struct intel_dp *intel_dp) +static int +_psr1_ready_for_pipe_update_locked(const struct intel_crtc_state *new_crtc_state, + struct intel_dsb *dsb) { - struct intel_display *display = to_intel_display(intel_dp); - enum transcoder cpu_transcoder = intel_dp->psr.transcoder; + struct intel_display *display = to_intel_display(new_crtc_state); + enum transcoder cpu_transcoder = new_crtc_state->cpu_transcoder; + + if (dsb) { + intel_dsb_poll(dsb, psr_status_reg(display, cpu_transcoder), + EDP_PSR_STATUS_STATE_MASK, 0, 200, + PSR_IDLE_TIMEOUT_MS * 1000 / 200); + return true; + } - /* - * From bspec: Panel Self Refresh (BDW+) - * Max. time for PSR to idle = Inverse of the refresh rate + 6 ms of - * exit training time + 1.5 ms of aux channel handshake. 50 ms is - * defensive enough to cover everything. - */ return intel_de_wait_for_clear(display, psr_status_reg(display, cpu_transcoder), - EDP_PSR_STATUS_STATE_MASK, 50); + EDP_PSR_STATUS_STATE_MASK, + PSR_IDLE_TIMEOUT_MS); } /** @@ -3041,9 +3079,11 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat continue; if (intel_dp->psr.sel_update_enabled) - ret = _psr2_ready_for_pipe_update_locked(intel_dp); + ret = _psr2_ready_for_pipe_update_locked(new_crtc_state, + NULL); else - ret = _psr1_ready_for_pipe_update_locked(intel_dp); + ret = _psr1_ready_for_pipe_update_locked(new_crtc_state, + NULL); if (ret) drm_err(display->drm, @@ -3051,6 +3091,18 @@ void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_stat } } +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state) +{ + if (!new_crtc_state->has_psr || new_crtc_state->has_panel_replay) + return; + + if (new_crtc_state->has_sel_update) + _psr2_ready_for_pipe_update_locked(new_crtc_state, dsb); + else + _psr1_ready_for_pipe_update_locked(new_crtc_state, dsb); +} + static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); @@ -3080,7 +3132,7 @@ static bool __psr_wait_for_idle_locked(struct intel_dp *intel_dp) /* After the unlocked wait, verify that PSR is still wanted! */ mutex_lock(&intel_dp->psr.lock); - return err == 0 && intel_dp->psr.enabled; + return err == 0 && intel_dp->psr.enabled && !intel_dp->psr.pause_counter; } static int intel_psr_fastset_force(struct intel_display *display) @@ -3209,8 +3261,13 @@ static void intel_psr_work(struct work_struct *work) if (!intel_dp->psr.enabled) goto unlock; - if (READ_ONCE(intel_dp->psr.irq_aux_error)) + if (READ_ONCE(intel_dp->psr.irq_aux_error)) { intel_psr_handle_irq(intel_dp); + goto unlock; + } + + if (intel_dp->psr.pause_counter) + goto unlock; /* * We have to make sure PSR is ready for re-enable @@ -3256,7 +3313,9 @@ static void intel_psr_configure_full_frame_update(struct intel_dp *intel_dp) static void _psr_invalidate_handle(struct intel_dp *intel_dp) { - if (intel_dp->psr.psr2_sel_fetch_enabled) { + struct intel_display *display = to_intel_display(intel_dp); + + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { if (!intel_dp->psr.psr2_sel_fetch_cff_enabled) { intel_dp->psr.psr2_sel_fetch_cff_enabled = true; intel_psr_configure_full_frame_update(intel_dp); @@ -3320,7 +3379,6 @@ tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits, enum fb_op_origin origin) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *i915 = to_i915(display->drm); if (!intel_dp->psr.dc3co_exitline || !intel_dp->psr.sel_update_enabled || !intel_dp->psr.active) @@ -3335,16 +3393,16 @@ tgl_dc3co_flush_locked(struct intel_dp *intel_dp, unsigned int frontbuffer_bits, return; tgl_psr2_enable_dc3co(intel_dp); - mod_delayed_work(i915->unordered_wq, &intel_dp->psr.dc3co_work, + mod_delayed_work(display->wq.unordered, &intel_dp->psr.dc3co_work, intel_dp->psr.dc3co_exit_delay); } static void _psr_flush_handle(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); - struct drm_i915_private *dev_priv = to_i915(display->drm); - if (intel_dp->psr.psr2_sel_fetch_enabled) { + if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { + /* Selective fetch prior LNL */ if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3361,13 +3419,22 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) * existing SU configuration */ intel_psr_configure_full_frame_update(intel_dp); - } - intel_psr_force_update(intel_dp); + intel_psr_force_update(intel_dp); + } else if (!intel_dp->psr.psr2_sel_fetch_enabled) { + /* + * PSR1 on all platforms + * PSR2 HW tracking + * Panel Replay Full frame update + */ + intel_psr_force_update(intel_dp); + } else { + /* Selective update LNL onwards */ + intel_psr_exit(intel_dp); + } - if (!intel_dp->psr.psr2_sel_fetch_enabled && !intel_dp->psr.active && - !intel_dp->psr.busy_frontbuffer_bits) - queue_work(dev_priv->unordered_wq, &intel_dp->psr.work); + if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) + queue_work(display->wq.unordered, &intel_dp->psr.work); } /** @@ -3702,7 +3769,7 @@ static void intel_psr_apply_underrun_on_idle_wa_locked(struct intel_dp *intel_dp struct intel_display *display = to_intel_display(intel_dp); bool dc5_dc6_blocked; - if (!intel_dp->psr.active) + if (!intel_dp->psr.active || !intel_dp->psr.pkg_c_latency_used) return; dc5_dc6_blocked = is_dc5_dc6_blocked(intel_dp); @@ -3727,7 +3794,8 @@ static void psr_dc5_dc6_wa_work(struct work_struct *work) mutex_lock(&intel_dp->psr.lock); - if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled) + if (intel_dp->psr.enabled && !intel_dp->psr.panel_replay_enabled && + !intel_dp->psr.pkg_c_latency_used) intel_psr_apply_underrun_on_idle_wa_locked(intel_dp); mutex_unlock(&intel_dp->psr.lock); @@ -3805,7 +3873,8 @@ void intel_psr_notify_pipe_change(struct intel_atomic_state *state, goto unlock; if ((enable && intel_dp->psr.active_non_psr_pipes) || - (!enable && !intel_dp->psr.active_non_psr_pipes)) { + (!enable && !intel_dp->psr.active_non_psr_pipes) || + !intel_dp->psr.pkg_c_latency_used) { intel_dp->psr.active_non_psr_pipes = active_non_psr_pipes; goto unlock; } @@ -3840,7 +3909,7 @@ void intel_psr_notify_vblank_enable_disable(struct intel_display *display, break; } - if (intel_dp->psr.enabled) + if (intel_dp->psr.enabled && intel_dp->psr.pkg_c_latency_used) intel_psr_apply_underrun_on_idle_wa_locked(intel_dp); mutex_unlock(&intel_dp->psr.lock); @@ -4136,12 +4205,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, void intel_psr_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_edp_psr_debug", 0644, minor->debugfs_root, + debugfs_create_file("i915_edp_psr_debug", 0644, debugfs_root, display, &i915_edp_psr_debug_fops); - debugfs_create_file("i915_edp_psr_status", 0444, minor->debugfs_root, + debugfs_create_file("i915_edp_psr_status", 0444, debugfs_root, display, &i915_edp_psr_status_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 0cf53184f13f..077751aa599f 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -52,11 +52,15 @@ void intel_psr_get_config(struct intel_encoder *encoder, void intel_psr_irq_handler(struct intel_dp *intel_dp, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); void intel_psr_wait_for_idle_locked(const struct intel_crtc_state *new_crtc_state); +void intel_psr_wait_for_idle_dsb(struct intel_dsb *dsb, + const struct intel_crtc_state *new_crtc_state); bool intel_psr_enabled(struct intel_dp *intel_dp); int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_psr2_program_trans_man_trk_ctl(struct intel_dsb *dsb, const struct intel_crtc_state *crtc_state); +void intel_psr2_panic_force_full_update(struct intel_display *display, + struct intel_crtc_state *crtc_state); void intel_psr_pause(struct intel_dp *intel_dp); void intel_psr_resume(struct intel_dp *intel_dp); bool intel_psr_needs_vblank_notification(const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index a32fae510ed2..d2e16b79d6be 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -80,6 +80,12 @@ static void quirk_fw_sync_len(struct intel_dp *intel_dp) drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); } +static void quirk_edp_limit_rate_hbr2(struct intel_display *display) +{ + intel_set_quirk(display, QUIRK_EDP_LIMIT_RATE_HBR2); + drm_info(display->drm, "Applying eDP Limit rate to HBR2 quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -231,6 +237,9 @@ static struct intel_quirk intel_quirks[] = { { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, /* HP Notebook - 14-r206nv */ { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, + + /* Dell XPS 13 7390 2-in-1 */ + { 0x8a12, 0x1028, 0x08b0, quirk_edp_limit_rate_hbr2 }, }; static const struct intel_dpcd_quirk intel_dpcd_quirks[] = { diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h index cafdebda7535..06da0e286c67 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.h +++ b/drivers/gpu/drm/i915/display/intel_quirks.h @@ -20,6 +20,7 @@ enum intel_quirk_id { QUIRK_LVDS_SSC_DISABLE, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK, QUIRK_FW_SYNC_LEN, + QUIRK_EDP_LIMIT_RATE_HBR2, }; void intel_init_quirks(struct intel_display *display); diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 87aff2754f69..6c032d81e7ee 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -47,11 +47,11 @@ #include "intel_display_driver.h" #include "intel_display_regs.h" #include "intel_display_types.h" -#include "intel_fdi.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" #include "intel_hdmi.h" #include "intel_hotplug.h" +#include "intel_link_bw.h" #include "intel_panel.h" #include "intel_sdvo.h" #include "intel_sdvo_regs.h" @@ -1367,7 +1367,7 @@ static int intel_sdvo_compute_config(struct intel_encoder *encoder, if (HAS_PCH_SPLIT(display)) { pipe_config->has_pch_encoder = true; - if (!intel_fdi_compute_pipe_bpp(pipe_config)) + if (!intel_link_bw_compute_pipe_bpp(pipe_config)) return -EINVAL; } @@ -2052,8 +2052,10 @@ static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder) { struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, - &intel_sdvo->hotplug_active, 2); + if (!intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG, + &intel_sdvo->hotplug_active, 2)) + drm_warn(intel_sdvo->base.base.dev, + "Failed to enable hotplug on SDVO encoder\n"); } static enum intel_hotplug_state diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index 74bb3bedf30f..7fe6b4a18213 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -103,10 +103,10 @@ static void get_ana_cp_int_prop(u64 vco_clk, DIV_ROUND_DOWN_ULL(curve_1_interpolated, CURVE0_MULTIPLIER))); ana_cp_int_temp = - DIV_ROUND_CLOSEST_ULL(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), - CURVE2_MULTIPLIER); + DIV64_U64_ROUND_CLOSEST(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), + CURVE2_MULTIPLIER); - *ana_cp_int = max(1, min(ana_cp_int_temp, 127)); + *ana_cp_int = clamp(ana_cp_int_temp, 1, 127); curve_2_scaled_int = curve_2_scaled1 * (*ana_cp_int); @@ -125,7 +125,7 @@ static void get_ana_cp_int_prop(u64 vco_clk, curve_1_interpolated); *ana_cp_prop = DIV64_U64_ROUND_UP(adjusted_vco_clk2, curve_2_scaled2); - *ana_cp_prop = max(1, min(*ana_cp_prop, 127)); + *ana_cp_prop = clamp(*ana_cp_prop, 1, 127); } static void compute_hdmi_tmds_pll(u64 pixel_clock, u32 refclk, diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index fd92e6b89b43..75bbaa923204 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -41,11 +41,11 @@ #include "i915_utils.h" #include "i9xx_plane.h" -#include "intel_atomic_plane.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fb.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_sprite.h" #include "intel_sprite_regs.h" @@ -264,8 +264,7 @@ static u32 vlv_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return sprctl; } -static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 vlv_sprite_ctl(const struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->hw.fb; unsigned int rotation = plane_state->hw.rotation; @@ -395,15 +394,12 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 sprsurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 sprctl, linear_offset; + u32 sprctl; sprctl = plane_state->ctl | vlv_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (display->platform.cherryview && pipe == PIPE_B) chv_sprite_update_csc(plane_state); @@ -418,7 +414,8 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, SPCONSTALPHA(pipe, plane_id), 0); - intel_de_write_fw(display, SPLINOFF(pipe, plane_id), linear_offset); + intel_de_write_fw(display, SPLINOFF(pipe, plane_id), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, SPTILEOFF(pipe, plane_id), SP_OFFSET_Y(y) | SP_OFFSET_X(x)); @@ -428,8 +425,7 @@ vlv_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, SPCNTR(pipe, plane_id), sprctl); - intel_de_write_fw(display, SPSURF(pipe, plane_id), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + intel_de_write_fw(display, SPSURF(pipe, plane_id), plane_state->surf); vlv_sprite_update_clrc(plane_state); vlv_sprite_update_gamma(plane_state); @@ -663,8 +659,7 @@ static bool ivb_need_sprite_gamma(const struct intel_plane_state *plane_state) (display->platform.ivybridge || display->platform.haswell); } -static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 ivb_sprite_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -830,15 +825,12 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 sprsurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 sprctl, linear_offset; + u32 sprctl; sprctl = plane_state->ctl | ivb_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (key->flags) { intel_de_write_fw(display, SPRKEYVAL(pipe), key->min_value); intel_de_write_fw(display, SPRKEYMSK(pipe), @@ -852,7 +844,8 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, SPROFFSET(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } else { - intel_de_write_fw(display, SPRLINOFF(pipe), linear_offset); + intel_de_write_fw(display, SPRLINOFF(pipe), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, SPRTILEOFF(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } @@ -863,8 +856,7 @@ ivb_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, SPRCTL(pipe), sprctl); - intel_de_write_fw(display, SPRSURF(pipe), - intel_plane_ggtt_offset(plane_state) + sprsurf_offset); + intel_de_write_fw(display, SPRSURF(pipe), plane_state->surf); ivb_sprite_update_gamma(plane_state); } @@ -1016,8 +1008,7 @@ static u32 g4x_sprite_ctl_crtc(const struct intel_crtc_state *crtc_state) return dvscntr; } -static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 g4x_sprite_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1181,15 +1172,12 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 dvssurf_offset = plane_state->view.color_plane[0].offset; u32 x = plane_state->view.color_plane[0].x; u32 y = plane_state->view.color_plane[0].y; - u32 dvscntr, linear_offset; + u32 dvscntr; dvscntr = plane_state->ctl | g4x_sprite_ctl_crtc(crtc_state); - linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); - if (key->flags) { intel_de_write_fw(display, DVSKEYVAL(pipe), key->min_value); intel_de_write_fw(display, DVSKEYMSK(pipe), @@ -1197,7 +1185,8 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, intel_de_write_fw(display, DVSKEYMAX(pipe), key->max_value); } - intel_de_write_fw(display, DVSLINOFF(pipe), linear_offset); + intel_de_write_fw(display, DVSLINOFF(pipe), + intel_fb_xy_to_linear(x, y, plane_state, 0)); intel_de_write_fw(display, DVSTILEOFF(pipe), DVS_OFFSET_Y(y) | DVS_OFFSET_X(x)); @@ -1207,8 +1196,7 @@ g4x_sprite_update_arm(struct intel_dsb *dsb, * the control register just before the surface register. */ intel_de_write_fw(display, DVSCNTR(pipe), dvscntr); - intel_de_write_fw(display, DVSSURF(pipe), - intel_plane_ggtt_offset(plane_state) + dvssurf_offset); + intel_de_write_fw(display, DVSSURF(pipe), plane_state->surf); if (display->platform.g4x) g4x_sprite_update_gamma(plane_state); @@ -1366,8 +1354,8 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, } } - ret = intel_atomic_plane_check_clipping(plane_state, crtc_state, - min_scale, max_scale, true); + ret = intel_plane_check_clipping(plane_state, crtc_state, + min_scale, max_scale, true); if (ret) return ret; @@ -1387,9 +1375,9 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, return ret; if (DISPLAY_VER(display) >= 7) - plane_state->ctl = ivb_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = ivb_sprite_ctl(plane_state); else - plane_state->ctl = g4x_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = g4x_sprite_ctl(plane_state); return 0; } @@ -1421,10 +1409,10 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = intel_atomic_plane_check_clipping(plane_state, crtc_state, - DRM_PLANE_NO_SCALING, - DRM_PLANE_NO_SCALING, - true); + ret = intel_plane_check_clipping(plane_state, crtc_state, + DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, + true); if (ret) return ret; @@ -1439,7 +1427,7 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - plane_state->ctl = vlv_sprite_ctl(crtc_state, plane_state); + plane_state->ctl = vlv_sprite_ctl(plane_state); return 0; } @@ -1624,6 +1612,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = vlv_sprite_capture_error; plane->get_hw_state = vlv_sprite_get_hw_state; plane->check_plane = vlv_sprite_check; + plane->surf_offset = i965_plane_surf_offset; plane->max_stride = i965_plane_max_stride; plane->min_alignment = vlv_plane_min_alignment; plane->min_cdclk = vlv_plane_min_cdclk; @@ -1648,6 +1637,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = ivb_sprite_capture_error; plane->get_hw_state = ivb_sprite_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->surf_offset = i965_plane_surf_offset; if (display->platform.broadwell || display->platform.haswell) { plane->max_stride = hsw_sprite_max_stride; @@ -1673,6 +1663,7 @@ intel_sprite_plane_create(struct intel_display *display, plane->capture_error = g4x_sprite_capture_error; plane->get_hw_state = g4x_sprite_get_hw_state; plane->check_plane = g4x_sprite_check; + plane->surf_offset = i965_plane_surf_offset; plane->max_stride = g4x_sprite_max_stride; plane->min_alignment = g4x_sprite_min_alignment; plane->min_cdclk = g4x_sprite_min_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 3bc57579fe53..c4a5601c5107 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -3,6 +3,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/iopoll.h> + #include <drm/drm_print.h> #include "i915_reg.h" @@ -23,10 +25,6 @@ #include "intel_modeset_lock.h" #include "intel_tc.h" -#define DP_PIN_ASSIGNMENT_C 0x3 -#define DP_PIN_ASSIGNMENT_D 0x4 -#define DP_PIN_ASSIGNMENT_E 0x5 - enum tc_port_mode { TC_PORT_DISCONNECTED, TC_PORT_TBT_ALT, @@ -65,7 +63,9 @@ struct intel_tc_port { enum tc_port_mode mode; enum tc_port_mode init_mode; enum phy_fia phy_fia; + enum intel_tc_pin_assignment pin_assignment; u8 phy_fia_idx; + u8 max_lane_count; }; static enum intel_display_power_domain @@ -251,6 +251,9 @@ tc_port_power_domain(struct intel_tc_port *tc) { enum tc_port tc_port = intel_encoder_to_tc(&tc->dig_port->base); + if (tc_port == TC_PORT_NONE) + return POWER_DOMAIN_INVALID; + return POWER_DOMAIN_PORT_DDI_LANES_TC1 + tc_port - TC_PORT_1; } @@ -263,13 +266,14 @@ assert_tc_port_power_enabled(struct intel_tc_port *tc) !intel_display_power_is_enabled(display, tc_port_power_domain(tc))); } -static u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) +static u32 get_lane_mask(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); + struct intel_display *display = to_intel_display(tc->dig_port); + intel_wakeref_t wakeref; u32 lane_mask; - lane_mask = intel_de_read(display, PORT_TX_DFLEXDPSP(tc->phy_fia)); + with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) + lane_mask = intel_de_read(display, PORT_TX_DFLEXDPSP(tc->phy_fia)); drm_WARN_ON(display->drm, lane_mask == 0xffffffff); assert_tc_cold_blocked(tc); @@ -278,75 +282,87 @@ static u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(tc->phy_fia_idx); } -u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) +static char pin_assignment_name(enum intel_tc_pin_assignment pin_assignment) { - struct intel_display *display = to_intel_display(dig_port); - struct intel_tc_port *tc = to_tc_port(dig_port); - u32 pin_mask; - - pin_mask = intel_de_read(display, PORT_TX_DFLEXPA1(tc->phy_fia)); - - drm_WARN_ON(display->drm, pin_mask == 0xffffffff); - assert_tc_cold_blocked(tc); + if (pin_assignment == INTEL_TC_PIN_ASSIGNMENT_NONE) + return '-'; - return (pin_mask & DP_PIN_ASSIGNMENT_MASK(tc->phy_fia_idx)) >> - DP_PIN_ASSIGNMENT_SHIFT(tc->phy_fia_idx); + return 'A' + pin_assignment - INTEL_TC_PIN_ASSIGNMENT_A; } -static int lnl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static enum intel_tc_pin_assignment +get_pin_assignment(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - enum tc_port tc_port = intel_encoder_to_tc(&dig_port->base); + struct intel_display *display = to_intel_display(tc->dig_port); + enum tc_port tc_port = intel_encoder_to_tc(&tc->dig_port->base); + enum intel_tc_pin_assignment pin_assignment; intel_wakeref_t wakeref; - u32 val, pin_assignment; + i915_reg_t reg; + u32 mask; + u32 val; + + if (tc->mode == TC_PORT_TBT_ALT) + return INTEL_TC_PIN_ASSIGNMENT_NONE; + + if (DISPLAY_VER(display) >= 20) { + reg = TCSS_DDI_STATUS(tc_port); + mask = TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK; + } else { + reg = PORT_TX_DFLEXPA1(tc->phy_fia); + mask = DP_PIN_ASSIGNMENT_MASK(tc->phy_fia_idx); + } with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - val = intel_de_read(display, TCSS_DDI_STATUS(tc_port)); + val = intel_de_read(display, reg); - pin_assignment = - REG_FIELD_GET(TCSS_DDI_STATUS_PIN_ASSIGNMENT_MASK, val); + drm_WARN_ON(display->drm, val == 0xffffffff); + assert_tc_cold_blocked(tc); + + pin_assignment = (val & mask) >> (ffs(mask) - 1); switch (pin_assignment) { + case INTEL_TC_PIN_ASSIGNMENT_A: + case INTEL_TC_PIN_ASSIGNMENT_B: + case INTEL_TC_PIN_ASSIGNMENT_F: + drm_WARN_ON(display->drm, DISPLAY_VER(display) > 11); + break; + case INTEL_TC_PIN_ASSIGNMENT_NONE: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_E: + break; default: MISSING_CASE(pin_assignment); - fallthrough; - case DP_PIN_ASSIGNMENT_D: - return 2; - case DP_PIN_ASSIGNMENT_C: - case DP_PIN_ASSIGNMENT_E: - return 4; } + + return pin_assignment; } -static int mtl_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static int mtl_get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - intel_wakeref_t wakeref; - u32 pin_mask; + enum intel_tc_pin_assignment pin_assignment; - with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - pin_mask = intel_tc_port_get_pin_assignment_mask(dig_port); + pin_assignment = get_pin_assignment(tc); - switch (pin_mask) { + switch (pin_assignment) { + case INTEL_TC_PIN_ASSIGNMENT_NONE: + return 0; default: - MISSING_CASE(pin_mask); + MISSING_CASE(pin_assignment); fallthrough; - case DP_PIN_ASSIGNMENT_D: + case INTEL_TC_PIN_ASSIGNMENT_D: return 2; - case DP_PIN_ASSIGNMENT_C: - case DP_PIN_ASSIGNMENT_E: + case INTEL_TC_PIN_ASSIGNMENT_C: + case INTEL_TC_PIN_ASSIGNMENT_E: return 4; } } -static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) +static int icl_get_max_lane_count(struct intel_tc_port *tc) { - struct intel_display *display = to_intel_display(dig_port); - intel_wakeref_t wakeref; u32 lane_mask = 0; - with_intel_display_power(display, POWER_DOMAIN_DISPLAY_CORE, wakeref) - lane_mask = intel_tc_port_get_lane_mask(dig_port); + lane_mask = get_lane_mask(tc); switch (lane_mask) { default: @@ -365,23 +381,44 @@ static int intel_tc_port_get_max_lane_count(struct intel_digital_port *dig_port) } } +static int get_max_lane_count(struct intel_tc_port *tc) +{ + struct intel_display *display = to_intel_display(tc->dig_port); + + if (tc->mode != TC_PORT_DP_ALT) + return 4; + + if (DISPLAY_VER(display) >= 14) + return mtl_get_max_lane_count(tc); + + return icl_get_max_lane_count(tc); +} + +static void read_pin_configuration(struct intel_tc_port *tc) +{ + tc->pin_assignment = get_pin_assignment(tc); + tc->max_lane_count = get_max_lane_count(tc); +} + int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port) { - struct intel_display *display = to_intel_display(dig_port); struct intel_tc_port *tc = to_tc_port(dig_port); - if (!intel_encoder_is_tc(&dig_port->base) || tc->mode != TC_PORT_DP_ALT) + if (!intel_encoder_is_tc(&dig_port->base)) return 4; - assert_tc_cold_blocked(tc); + return tc->max_lane_count; +} - if (DISPLAY_VER(display) >= 20) - return lnl_tc_port_get_max_lane_count(dig_port); +enum intel_tc_pin_assignment +intel_tc_port_get_pin_assignment(struct intel_digital_port *dig_port) +{ + struct intel_tc_port *tc = to_tc_port(dig_port); - if (DISPLAY_VER(display) >= 14) - return mtl_tc_port_get_max_lane_count(dig_port); + if (!intel_encoder_is_tc(&dig_port->base)) + return INTEL_TC_PIN_ASSIGNMENT_NONE; - return intel_tc_port_get_max_lane_count(dig_port); + return tc->pin_assignment; } void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, @@ -596,9 +633,12 @@ static void icl_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + __tc_cold_unblock(tc, domain, tc_cold_wref); } @@ -656,8 +696,11 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if ((!tc_phy_is_ready(tc) || !icl_tc_phy_take_ownership(tc, true)) && @@ -668,6 +711,7 @@ static bool icl_tc_phy_connect(struct intel_tc_port *tc, goto out_unblock_tc_cold; } + read_pin_configuration(tc); if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -858,9 +902,12 @@ static void adlp_tc_phy_get_hw_state(struct intel_tc_port *tc) port_wakeref = intel_display_power_get(display, port_power_domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + } + intel_display_power_put(display, port_power_domain, port_wakeref); } @@ -873,6 +920,9 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) if (tc->mode == TC_PORT_TBT_ALT) { tc->lock_wakeref = tc_cold_block(tc); + + read_pin_configuration(tc); + return true; } @@ -894,6 +944,8 @@ static bool adlp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_unblock_tc_cold; @@ -1000,8 +1052,13 @@ static bool xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled) { struct intel_display *display = to_intel_display(tc->dig_port); + bool is_enabled; + int ret; - if (wait_for(xelpdp_tc_phy_tcss_power_is_enabled(tc) == enabled, 5)) { + ret = poll_timeout_us(is_enabled = xelpdp_tc_phy_tcss_power_is_enabled(tc), + is_enabled == enabled, + 200, 5000, false); + if (ret) { drm_dbg_kms(display->drm, "Port %s: timeout waiting for TCSS power to get %s\n", str_enabled_disabled(enabled), @@ -1124,9 +1181,18 @@ static void xelpdp_tc_phy_get_hw_state(struct intel_tc_port *tc) tc_cold_wref = __tc_cold_block(tc, &domain); tc->mode = tc_phy_get_current_mode(tc); - if (tc->mode != TC_PORT_DISCONNECTED) + if (tc->mode != TC_PORT_DISCONNECTED) { tc->lock_wakeref = tc_cold_block(tc); + read_pin_configuration(tc); + /* + * Set a valid lane count value for a DP-alt sink which got + * disconnected. The driver can only disable the output on this PHY. + */ + if (tc->max_lane_count == 0) + tc->max_lane_count = 4; + } + drm_WARN_ON(display->drm, (tc->mode == TC_PORT_DP_ALT || tc->mode == TC_PORT_LEGACY) && !xelpdp_tc_phy_tcss_power_is_enabled(tc)); @@ -1138,14 +1204,19 @@ static bool xelpdp_tc_phy_connect(struct intel_tc_port *tc, int required_lanes) { tc->lock_wakeref = tc_cold_block(tc); - if (tc->mode == TC_PORT_TBT_ALT) + if (tc->mode == TC_PORT_TBT_ALT) { + read_pin_configuration(tc); + return true; + } if (!xelpdp_tc_phy_enable_tcss_power(tc, true)) goto out_unblock_tccold; xelpdp_tc_phy_take_ownership(tc, true); + read_pin_configuration(tc); + if (!tc_phy_verify_legacy_or_dp_alt_mode(tc, required_lanes)) goto out_release_phy; @@ -1226,14 +1297,19 @@ static void tc_phy_get_hw_state(struct intel_tc_port *tc) tc->phy_ops->get_hw_state(tc); } -static bool tc_phy_is_ready_and_owned(struct intel_tc_port *tc, - bool phy_is_ready, bool phy_is_owned) +/* Is the PHY owned by display i.e. is it in legacy or DP-alt mode? */ +static bool tc_phy_owned_by_display(struct intel_tc_port *tc, + bool phy_is_ready, bool phy_is_owned) { struct intel_display *display = to_intel_display(tc->dig_port); - drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); + if (DISPLAY_VER(display) < 20) { + drm_WARN_ON(display->drm, phy_is_owned && !phy_is_ready); - return phy_is_ready && phy_is_owned; + return phy_is_ready && phy_is_owned; + } else { + return phy_is_owned; + } } static bool tc_phy_is_connected(struct intel_tc_port *tc, @@ -1244,7 +1320,7 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, bool phy_is_owned = tc_phy_is_owned(tc); bool is_connected; - if (tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) + if (tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) is_connected = port_pll_type == ICL_PORT_DPLL_MG_PHY; else is_connected = port_pll_type == ICL_PORT_DPLL_DEFAULT; @@ -1263,8 +1339,13 @@ static bool tc_phy_is_connected(struct intel_tc_port *tc, static bool tc_phy_wait_for_ready(struct intel_tc_port *tc) { struct intel_display *display = to_intel_display(tc->dig_port); + bool is_ready; + int ret; - if (wait_for(tc_phy_is_ready(tc), 500)) { + ret = poll_timeout_us(is_ready = tc_phy_is_ready(tc), + is_ready, + 1000, 500 * 1000, false); + if (ret) { drm_err(display->drm, "Port %s: timeout waiting for PHY ready\n", tc->port_name); @@ -1352,7 +1433,7 @@ tc_phy_get_current_mode(struct intel_tc_port *tc) phy_is_ready = tc_phy_is_ready(tc); phy_is_owned = tc_phy_is_owned(tc); - if (!tc_phy_is_ready_and_owned(tc, phy_is_ready, phy_is_owned)) { + if (!tc_phy_owned_by_display(tc, phy_is_ready, phy_is_owned)) { mode = get_tc_mode_in_phy_not_owned_state(tc, live_mode); } else { drm_WARN_ON(display->drm, live_mode == TC_PORT_TBT_ALT); @@ -1441,21 +1522,24 @@ static void intel_tc_port_reset_mode(struct intel_tc_port *tc, intel_display_power_flush_work(display); if (!intel_tc_cold_requires_aux_pw(dig_port)) { enum intel_display_power_domain aux_domain; - bool aux_powered; aux_domain = intel_aux_power_domain(dig_port); - aux_powered = intel_display_power_is_enabled(display, aux_domain); - drm_WARN_ON(display->drm, aux_powered); + if (intel_display_power_is_enabled(display, aux_domain)) + drm_dbg_kms(display->drm, "Port %s: AUX unexpectedly powered\n", + tc->port_name); } tc_phy_disconnect(tc); if (!force_disconnect) tc_phy_connect(tc, required_lanes); - drm_dbg_kms(display->drm, "Port %s: TC port mode reset (%s -> %s)\n", + drm_dbg_kms(display->drm, + "Port %s: TC port mode reset (%s -> %s) pin assignment: %c max lanes: %d\n", tc->port_name, tc_port_mode_name(old_tc_mode), - tc_port_mode_name(tc->mode)); + tc_port_mode_name(tc->mode), + pin_assignment_name(tc->pin_assignment), + tc->max_lane_count); } static bool intel_tc_port_needs_reset(struct intel_tc_port *tc) @@ -1610,9 +1694,11 @@ void intel_tc_port_sanitize_mode(struct intel_digital_port *dig_port, __intel_tc_port_put_link(tc); } - drm_dbg_kms(display->drm, "Port %s: sanitize mode (%s)\n", + drm_dbg_kms(display->drm, "Port %s: sanitize mode (%s) pin assignment: %c max lanes: %d\n", tc->port_name, - tc_port_mode_name(tc->mode)); + tc_port_mode_name(tc->mode), + pin_assignment_name(tc->pin_assignment), + tc->max_lane_count); mutex_unlock(&tc->lock); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 26c4265368c1..fff8b96e4972 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -12,6 +12,75 @@ struct intel_crtc_state; struct intel_digital_port; struct intel_encoder; +/* + * The following enum values must stay fixed, as they match the corresponding + * pin assignment fields in the PORT_TX_DFLEXPA1 and TCSS_DDI_STATUS registers. + */ +enum intel_tc_pin_assignment { /* Lanes (a) Signal/ Cable Notes */ + /* DP USB Rate (b) type */ + INTEL_TC_PIN_ASSIGNMENT_NONE = 0, /* 4 - - - (c) */ + INTEL_TC_PIN_ASSIGNMENT_A, /* 2/4 0 GEN2 TC->TC (d,e) */ + INTEL_TC_PIN_ASSIGNMENT_B, /* 1/2 1 GEN2 TC->TC (d,f,g) */ + INTEL_TC_PIN_ASSIGNMENT_C, /* 4 0 DP2 TC->TC (h) */ + INTEL_TC_PIN_ASSIGNMENT_D, /* 2 1 DP2 TC->TC (h,g) */ + INTEL_TC_PIN_ASSIGNMENT_E, /* 4 0 DP2 TC->DP */ + INTEL_TC_PIN_ASSIGNMENT_F, /* 2 1 GEN1/DP1 TC->DP (d,g,i) */ + /* + * (a) - DP unidirectional lanes, each lane using 1 differential signal + * pair. + * - USB SuperSpeed bidirectional lane, using 2 differential (TX and + * RX) signal pairs. + * - USB 2.0 (HighSpeed) unidirectional lane, using 1 differential + * signal pair. Not indicated, this lane is always present on pin + * assignments A-D and never present on pin assignments E/F. + * (b) - GEN1: USB 3.1 GEN1 bit rate (5 Gbps) and signaling. This + * is used for transferring only a USB stream. + * - GEN2: USB 3.1 GEN2 bit rate (10 Gbps) and signaling. This + * allows transferring an HBR3 (8.1 Gbps) DP stream. + * - DP1: Display Port signaling defined by the DP v1.3 Standard, + * with a maximum bit rate of HBR3. + * - DP2: Display Port signaling defined by the DP v2.1 Standard, + * with a maximum bit rate defined by the DP Alt Mode + * v2.1a Standard depending on the cable type as follows: + * - Passive (Full-Featured) USB 3.2 GEN1 + * TC->TC cables (CC3G1-X) : UHBR10 + * - Passive (Full-Featured) USB 3.2/4 GEN2 and + * Thunderbolt Alt Mode GEN2 + * TC->TC cables (CC3G2-X) all : UHBR10 + * DP54 logo : UHBR13.5 + * - Passive (Full-Featured) USB4 GEN3+ and + * Thunderbolt Alt Mode GEN3+ + * TC->TC cables (CC4G3-X) all : UHBR13.5 + * DP80 logo : UHBR20 + * - Active Re-Timed or + * Active Linear Re-driven (LRD) + * USB3.2 GEN1/2 and USB4 GEN2+ + * TC->TC cables all : HBR3 + * with DP_BR CTS : UHBR10 + * DP54 logo : UHBR13.5 + * DP80 logo : UHBR20 + * - Passive/Active Re-Timed or + * Active Linear Re-driven (LRD) + * TC->DP cables with DP_BR CTS/DP8K logo : HBR3 + * with DP_BR CTS : UHBR10 + * DP54 logo : UHBR13.5 + * DP80 logo : UHBR20 + * (c) Used in TBT-alt/legacy modes and on LNL+ after the sink + * disconnected in DP-alt mode. + * (d) Only defined by the DP Alt Standard v1.0a, deprecated by v1.0b, + * only supported on ICL. + * (e) GEN2 passive 1 m cable: 4 DP lanes, GEN2 active cable: 2 DP lanes. + * (f) GEN2 passive 1 m cable: 2 DP lanes, GEN2 active cable: 1 DP lane. + * (g) These pin assignments are also referred to as (USB/DP) + * multifunction or Multifunction Display Port (MFD) modes. + * (h) Also used where one end of the cable is a captive connector, + * attached to a DP->HDMI/DVI/VGA converter. + * (i) The DP end of the cable is a captive connector attached to a + * (DP/USB) multifunction dock as defined by the DockPort v1.0a + * specification. + */ +}; + bool intel_tc_port_in_tbt_alt_mode(struct intel_digital_port *dig_port); bool intel_tc_port_in_dp_alt_mode(struct intel_digital_port *dig_port); bool intel_tc_port_in_legacy_mode(struct intel_digital_port *dig_port); @@ -19,7 +88,8 @@ bool intel_tc_port_handles_hpd_glitches(struct intel_digital_port *dig_port); bool intel_tc_port_connected(struct intel_encoder *encoder); -u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); +enum intel_tc_pin_assignment +intel_tc_port_get_pin_assignment(struct intel_digital_port *dig_port); int intel_tc_port_max_lane_count(struct intel_digital_port *dig_port); void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes); diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 70ba7aa26bf4..c15234c1d96e 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -3,9 +3,12 @@ * Copyright © 2022-2023 Intel Corporation */ +#include <linux/iopoll.h> + #include <drm/drm_vblank.h> #include "i915_drv.h" +#include "i915_utils.h" #include "intel_color.h" #include "intel_crtc.h" #include "intel_de.h" @@ -492,9 +495,14 @@ static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { struct intel_display *display = to_intel_display(crtc); enum pipe pipe = crtc->pipe; + bool is_moving; + int ret; /* Wait for the display line to settle/start moving */ - if (wait_for(pipe_scanline_is_moving(display, pipe) == state, 100)) + ret = poll_timeout_us(is_moving = pipe_scanline_is_moving(display, pipe), + is_moving == state, + 500, 100 * 1000, false); + if (ret) drm_err(display->drm, "pipe %c scanline %s wait timed out\n", pipe_name(pipe), str_on_off(state)); @@ -724,9 +732,9 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) break; if (!timeout) { - drm_err(display->drm, - "Potential atomic update failure on pipe %c\n", - pipe_name(crtc->pipe)); + drm_dbg_kms(display->drm, + "Potential atomic update failure on pipe %c\n", + pipe_name(crtc->pipe)); break; } diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 92c04811aa28..70e31520c560 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -37,7 +37,7 @@ #ifndef _INTEL_VBT_DEFS_H_ #define _INTEL_VBT_DEFS_H_ -#include "intel_bios.h" +#include "intel_dsi_vbt_defs.h" /* EDID derived structures */ struct bdb_edid_pnp_id { @@ -437,6 +437,22 @@ enum vbt_gmbus_ddi { #define BDB_230_VBT_DP_MAX_LINK_RATE_UHBR13P5 6 #define BDB_230_VBT_DP_MAX_LINK_RATE_UHBR20 7 +/* EDP link rate 263+ */ +#define BDB_263_VBT_EDP_LINK_RATE_1_62 BIT_U32(0) +#define BDB_263_VBT_EDP_LINK_RATE_2_16 BIT_U32(1) +#define BDB_263_VBT_EDP_LINK_RATE_2_43 BIT_U32(2) +#define BDB_263_VBT_EDP_LINK_RATE_2_7 BIT_U32(3) +#define BDB_263_VBT_EDP_LINK_RATE_3_24 BIT_U32(4) +#define BDB_263_VBT_EDP_LINK_RATE_4_32 BIT_U32(5) +#define BDB_263_VBT_EDP_LINK_RATE_5_4 BIT_U32(6) +#define BDB_263_VBT_EDP_LINK_RATE_6_75 BIT_U32(7) +#define BDB_263_VBT_EDP_LINK_RATE_8_1 BIT_U32(8) +#define BDB_263_VBT_EDP_LINK_RATE_10 BIT_U32(9) +#define BDB_263_VBT_EDP_LINK_RATE_13_5 BIT_U32(10) +#define BDB_263_VBT_EDP_LINK_RATE_20 BIT_U32(11) +#define BDB_263_VBT_EDP_NUM_RATES 12 +#define BDB_263_VBT_EDP_RATES_MASK GENMASK(BDB_263_VBT_EDP_NUM_RATES - 1, 0) + /* * The child device config, aka the display device data structure, provides a * description of a port and its configuration on the platform. @@ -547,6 +563,8 @@ struct child_device_config { u8 dp_max_link_rate:3; /* 216+ */ u8 dp_max_link_rate_reserved:5; /* 216+ */ u8 efp_index; /* 256+ */ + u32 edp_data_rate_override:12; /* 263+ */ + u32 edp_data_rate_override_reserved:20; /* 263+ */ } __packed; struct bdb_general_definitions { diff --git a/drivers/gpu/drm/i915/display/intel_vrr_regs.h b/drivers/gpu/drm/i915/display/intel_vrr_regs.h index 09cdd50d6187..ba9b9215dc11 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vrr_regs.h @@ -8,126 +8,119 @@ #include "intel_display_reg_defs.h" -/* VRR registers */ #define _TRANS_VRR_CTL_A 0x60420 #define _TRANS_VRR_CTL_B 0x61420 #define _TRANS_VRR_CTL_C 0x62420 #define _TRANS_VRR_CTL_D 0x63420 -#define TRANS_VRR_CTL(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_CTL_A) -#define VRR_CTL_VRR_ENABLE REG_BIT(31) -#define VRR_CTL_IGN_MAX_SHIFT REG_BIT(30) -#define VRR_CTL_FLIP_LINE_EN REG_BIT(29) -#define VRR_CTL_PIPELINE_FULL_MASK REG_GENMASK(10, 3) -#define VRR_CTL_PIPELINE_FULL(x) REG_FIELD_PREP(VRR_CTL_PIPELINE_FULL_MASK, (x)) -#define VRR_CTL_PIPELINE_FULL_OVERRIDE REG_BIT(0) -#define XELPD_VRR_CTL_VRR_GUARDBAND_MASK REG_GENMASK(15, 0) -#define XELPD_VRR_CTL_VRR_GUARDBAND(x) REG_FIELD_PREP(XELPD_VRR_CTL_VRR_GUARDBAND_MASK, (x)) +#define TRANS_VRR_CTL(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_CTL_A) +#define VRR_CTL_VRR_ENABLE REG_BIT(31) +#define VRR_CTL_IGN_MAX_SHIFT REG_BIT(30) +#define VRR_CTL_FLIP_LINE_EN REG_BIT(29) +#define VRR_CTL_CMRR_ENABLE REG_BIT(27) +#define VRR_CTL_PIPELINE_FULL_MASK REG_GENMASK(10, 3) +#define VRR_CTL_PIPELINE_FULL(x) REG_FIELD_PREP(VRR_CTL_PIPELINE_FULL_MASK, (x)) +#define VRR_CTL_PIPELINE_FULL_OVERRIDE REG_BIT(0) +#define XELPD_VRR_CTL_VRR_GUARDBAND_MASK REG_GENMASK(15, 0) +#define XELPD_VRR_CTL_VRR_GUARDBAND(x) REG_FIELD_PREP(XELPD_VRR_CTL_VRR_GUARDBAND_MASK, (x)) #define _TRANS_VRR_VMAX_A 0x60424 #define _TRANS_VRR_VMAX_B 0x61424 #define _TRANS_VRR_VMAX_C 0x62424 #define _TRANS_VRR_VMAX_D 0x63424 -#define TRANS_VRR_VMAX(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_VMAX_A) -#define VRR_VMAX_MASK REG_GENMASK(19, 0) +#define TRANS_VRR_VMAX(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_VMAX_A) +#define VRR_VMAX_MASK REG_GENMASK(19, 0) #define _TRANS_VRR_VMIN_A 0x60434 #define _TRANS_VRR_VMIN_B 0x61434 #define _TRANS_VRR_VMIN_C 0x62434 #define _TRANS_VRR_VMIN_D 0x63434 -#define TRANS_VRR_VMIN(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_VMIN_A) -#define VRR_VMIN_MASK REG_GENMASK(15, 0) +#define TRANS_VRR_VMIN(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_VMIN_A) +#define VRR_VMIN_MASK REG_GENMASK(15, 0) #define _TRANS_VRR_VMAXSHIFT_A 0x60428 #define _TRANS_VRR_VMAXSHIFT_B 0x61428 #define _TRANS_VRR_VMAXSHIFT_C 0x62428 #define _TRANS_VRR_VMAXSHIFT_D 0x63428 -#define TRANS_VRR_VMAXSHIFT(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, \ - _TRANS_VRR_VMAXSHIFT_A) -#define VRR_VMAXSHIFT_DEC_MASK REG_GENMASK(29, 16) -#define VRR_VMAXSHIFT_DEC REG_BIT(16) -#define VRR_VMAXSHIFT_INC_MASK REG_GENMASK(12, 0) +#define TRANS_VRR_VMAXSHIFT(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_VMAXSHIFT_A) +#define VRR_VMAXSHIFT_DEC_MASK REG_GENMASK(29, 16) +#define VRR_VMAXSHIFT_DEC REG_BIT(16) +#define VRR_VMAXSHIFT_INC_MASK REG_GENMASK(12, 0) #define _TRANS_VRR_STATUS_A 0x6042c #define _TRANS_VRR_STATUS_B 0x6142c #define _TRANS_VRR_STATUS_C 0x6242c #define _TRANS_VRR_STATUS_D 0x6342c -#define TRANS_VRR_STATUS(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_STATUS_A) -#define VRR_STATUS_VMAX_REACHED REG_BIT(31) -#define VRR_STATUS_NOFLIP_TILL_BNDR REG_BIT(30) -#define VRR_STATUS_FLIP_BEF_BNDR REG_BIT(29) -#define VRR_STATUS_NO_FLIP_FRAME REG_BIT(28) -#define VRR_STATUS_VRR_EN_LIVE REG_BIT(27) -#define VRR_STATUS_FLIPS_SERVICED REG_BIT(26) -#define VRR_STATUS_VBLANK_MASK REG_GENMASK(22, 20) -#define STATUS_FSM_IDLE REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 0) -#define STATUS_FSM_WAIT_TILL_FDB REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 1) -#define STATUS_FSM_WAIT_TILL_FS REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 2) -#define STATUS_FSM_WAIT_TILL_FLIP REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 3) -#define STATUS_FSM_PIPELINE_FILL REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 4) -#define STATUS_FSM_ACTIVE REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 5) -#define STATUS_FSM_LEGACY_VBLANK REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 6) +#define TRANS_VRR_STATUS(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_STATUS_A) +#define VRR_STATUS_VMAX_REACHED REG_BIT(31) +#define VRR_STATUS_NOFLIP_TILL_BNDR REG_BIT(30) +#define VRR_STATUS_FLIP_BEF_BNDR REG_BIT(29) +#define VRR_STATUS_NO_FLIP_FRAME REG_BIT(28) +#define VRR_STATUS_VRR_EN_LIVE REG_BIT(27) +#define VRR_STATUS_FLIPS_SERVICED REG_BIT(26) +#define VRR_STATUS_VBLANK_MASK REG_GENMASK(22, 20) +#define STATUS_FSM_IDLE REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 0) +#define STATUS_FSM_WAIT_TILL_FDB REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 1) +#define STATUS_FSM_WAIT_TILL_FS REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 2) +#define STATUS_FSM_WAIT_TILL_FLIP REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 3) +#define STATUS_FSM_PIPELINE_FILL REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 4) +#define STATUS_FSM_ACTIVE REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 5) +#define STATUS_FSM_LEGACY_VBLANK REG_FIELD_PREP(VRR_STATUS_VBLANK_MASK, 6) #define _TRANS_VRR_VTOTAL_PREV_A 0x60480 #define _TRANS_VRR_VTOTAL_PREV_B 0x61480 #define _TRANS_VRR_VTOTAL_PREV_C 0x62480 #define _TRANS_VRR_VTOTAL_PREV_D 0x63480 -#define TRANS_VRR_VTOTAL_PREV(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, \ - _TRANS_VRR_VTOTAL_PREV_A) -#define VRR_VTOTAL_FLIP_BEFR_BNDR REG_BIT(31) -#define VRR_VTOTAL_FLIP_AFTER_BNDR REG_BIT(30) -#define VRR_VTOTAL_FLIP_AFTER_DBLBUF REG_BIT(29) -#define VRR_VTOTAL_PREV_FRAME_MASK REG_GENMASK(19, 0) +#define TRANS_VRR_VTOTAL_PREV(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_VTOTAL_PREV_A) +#define VRR_VTOTAL_FLIP_BEFR_BNDR REG_BIT(31) +#define VRR_VTOTAL_FLIP_AFTER_BNDR REG_BIT(30) +#define VRR_VTOTAL_FLIP_AFTER_DBLBUF REG_BIT(29) +#define VRR_VTOTAL_PREV_FRAME_MASK REG_GENMASK(19, 0) #define _TRANS_VRR_FLIPLINE_A 0x60438 #define _TRANS_VRR_FLIPLINE_B 0x61438 #define _TRANS_VRR_FLIPLINE_C 0x62438 #define _TRANS_VRR_FLIPLINE_D 0x63438 -#define TRANS_VRR_FLIPLINE(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, \ - _TRANS_VRR_FLIPLINE_A) -#define VRR_FLIPLINE_MASK REG_GENMASK(19, 0) +#define TRANS_VRR_FLIPLINE(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_FLIPLINE_A) +#define VRR_FLIPLINE_MASK REG_GENMASK(19, 0) #define _TRANS_VRR_STATUS2_A 0x6043c #define _TRANS_VRR_STATUS2_B 0x6143c #define _TRANS_VRR_STATUS2_C 0x6243c #define _TRANS_VRR_STATUS2_D 0x6343c -#define TRANS_VRR_STATUS2(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_STATUS2_A) -#define VRR_STATUS2_VERT_LN_CNT_MASK REG_GENMASK(19, 0) +#define TRANS_VRR_STATUS2(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_STATUS2_A) +#define VRR_STATUS2_VERT_LN_CNT_MASK REG_GENMASK(19, 0) #define _TRANS_PUSH_A 0x60a70 #define _TRANS_PUSH_B 0x61a70 #define _TRANS_PUSH_C 0x62a70 #define _TRANS_PUSH_D 0x63a70 -#define TRANS_PUSH(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_PUSH_A) -#define TRANS_PUSH_EN REG_BIT(31) -#define TRANS_PUSH_SEND REG_BIT(30) +#define TRANS_PUSH(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_PUSH_A) +#define TRANS_PUSH_EN REG_BIT(31) +#define TRANS_PUSH_SEND REG_BIT(30) #define _TRANS_VRR_VSYNC_A 0x60078 -#define TRANS_VRR_VSYNC(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_VRR_VSYNC_A) -#define VRR_VSYNC_END_MASK REG_GENMASK(28, 16) -#define VRR_VSYNC_END(vsync_end) REG_FIELD_PREP(VRR_VSYNC_END_MASK, (vsync_end)) -#define VRR_VSYNC_START_MASK REG_GENMASK(12, 0) -#define VRR_VSYNC_START(vsync_start) REG_FIELD_PREP(VRR_VSYNC_START_MASK, (vsync_start)) +#define TRANS_VRR_VSYNC(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_VRR_VSYNC_A) +#define VRR_VSYNC_END_MASK REG_GENMASK(28, 16) +#define VRR_VSYNC_END(vsync_end) REG_FIELD_PREP(VRR_VSYNC_END_MASK, (vsync_end)) +#define VRR_VSYNC_START_MASK REG_GENMASK(12, 0) +#define VRR_VSYNC_START(vsync_start) REG_FIELD_PREP(VRR_VSYNC_START_MASK, (vsync_start)) /* Common register for HDMI EMP and DP AS SDP */ #define _EMP_AS_SDP_TL_A 0x60204 -#define EMP_AS_SDP_DB_TL_MASK REG_GENMASK(12, 0) -#define EMP_AS_SDP_TL(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _EMP_AS_SDP_TL_A) -#define EMP_AS_SDP_DB_TL(db_transmit_line) REG_FIELD_PREP(EMP_AS_SDP_DB_TL_MASK, (db_transmit_line)) - -/*CMRR Registers*/ +#define EMP_AS_SDP_TL(display, trans) _MMIO_TRANS2((display), (trans), _EMP_AS_SDP_TL_A) +#define EMP_AS_SDP_DB_TL_MASK REG_GENMASK(12, 0) +#define EMP_AS_SDP_DB_TL(db_transmit_line) REG_FIELD_PREP(EMP_AS_SDP_DB_TL_MASK, (db_transmit_line)) #define _TRANS_CMRR_M_LO_A 0x604F0 -#define TRANS_CMRR_M_LO(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_CMRR_M_LO_A) +#define TRANS_CMRR_M_LO(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_CMRR_M_LO_A) #define _TRANS_CMRR_M_HI_A 0x604F4 -#define TRANS_CMRR_M_HI(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_CMRR_M_HI_A) +#define TRANS_CMRR_M_HI(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_CMRR_M_HI_A) #define _TRANS_CMRR_N_LO_A 0x604F8 -#define TRANS_CMRR_N_LO(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_CMRR_N_LO_A) +#define TRANS_CMRR_N_LO(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_CMRR_N_LO_A) #define _TRANS_CMRR_N_HI_A 0x604FC -#define TRANS_CMRR_N_HI(dev_priv, trans) _MMIO_TRANS2(dev_priv, trans, _TRANS_CMRR_N_HI_A) - -#define VRR_CTL_CMRR_ENABLE REG_BIT(27) +#define TRANS_CMRR_N_HI(display, trans) _MMIO_TRANS2((display), (trans), _TRANS_CMRR_N_HI_A) #endif /* __INTEL_VRR_REGS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_wm.c b/drivers/gpu/drm/i915/display/intel_wm.c index bba82e888db2..f887a664fe22 100644 --- a/drivers/gpu/drm/i915/display/intel_wm.c +++ b/drivers/gpu/drm/i915/display/intel_wm.c @@ -5,7 +5,6 @@ #include <linux/debugfs.h> -#include <drm/drm_file.h> #include <drm/drm_print.h> #include "i9xx_wm.h" @@ -390,15 +389,15 @@ static const struct file_operations i915_cur_wm_latency_fops = { void intel_wm_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; - debugfs_create_file("i915_pri_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_pri_wm_latency", 0644, debugfs_root, display, &i915_pri_wm_latency_fops); - debugfs_create_file("i915_spr_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_spr_wm_latency", 0644, debugfs_root, display, &i915_spr_wm_latency_fops); - debugfs_create_file("i915_cur_wm_latency", 0644, minor->debugfs_root, + debugfs_create_file("i915_cur_wm_latency", 0644, debugfs_root, display, &i915_cur_wm_latency_fops); skl_watermark_debugfs_register(display); diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index d77798499c57..c6cccf170ff1 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -10,6 +10,7 @@ #include "intel_display_regs.h" #include "intel_display_trace.h" #include "intel_display_types.h" +#include "intel_display_wa.h" #include "intel_fb.h" #include "skl_scaler.h" #include "skl_universal_plane.h" @@ -91,11 +92,9 @@ static void skl_scaler_min_src_size(const struct drm_format_info *format, } } -static void skl_scaler_max_src_size(struct intel_crtc *crtc, +static void skl_scaler_max_src_size(struct intel_display *display, int *max_w, int *max_h) { - struct intel_display *display = to_intel_display(crtc); - if (DISPLAY_VER(display) >= 14) { *max_w = 4096; *max_h = 8192; @@ -134,6 +133,23 @@ static void skl_scaler_max_dst_size(struct intel_crtc *crtc, } } +enum drm_mode_status +skl_scaler_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes) +{ + int max_h, max_w; + + if (num_joined_pipes < 2 && output_format == INTEL_OUTPUT_FORMAT_YCBCR420) { + skl_scaler_max_src_size(display, &max_w, &max_h); + if (mode->hdisplay > max_h) + return MODE_NO_420; + } + + return MODE_OK; +} + static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, @@ -201,7 +217,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, } skl_scaler_min_src_size(format, modifier, &min_src_w, &min_src_h); - skl_scaler_max_src_size(crtc, &max_src_w, &max_src_h); + skl_scaler_max_src_size(display, &max_src_w, &max_src_h); skl_scaler_min_dst_size(&min_dst_w, &min_dst_h); skl_scaler_max_dst_size(crtc, &max_dst_w, &max_dst_h); @@ -747,6 +763,9 @@ void skl_pfit_enable(const struct intel_crtc_state *crtc_state) crtc_state->scaler_state.scaler_id < 0)) return; + if (intel_display_wa(display, 14011503117)) + adl_scaler_ecc_mask(crtc_state); + drm_rect_init(&src, 0, 0, drm_rect_width(&crtc_state->pipe_src) << 16, drm_rect_height(&crtc_state->pipe_src) << 16); @@ -923,3 +942,29 @@ void skl_scaler_get_config(struct intel_crtc_state *crtc_state) else scaler_state->scaler_users &= ~(1 << SKL_CRTC_INDEX); } + +void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (!crtc_state->pch_pfit.enabled) + return; + + intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); +} + +void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state) +{ + struct intel_display *display = to_intel_display(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; + + if (scaler_state->scaler_id < 0) + return; + + intel_de_write_fw(display, + SKL_PS_ECC_STAT(crtc->pipe, scaler_state->scaler_id), + 1); + intel_de_write(display, XELPD_DISPLAY_ERR_FATAL_MASK, 0); +} diff --git a/drivers/gpu/drm/i915/display/skl_scaler.h b/drivers/gpu/drm/i915/display/skl_scaler.h index 355ea15260ca..12a19016c5f6 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.h +++ b/drivers/gpu/drm/i915/display/skl_scaler.h @@ -5,10 +5,14 @@ #ifndef INTEL_SCALER_H #define INTEL_SCALER_H +enum drm_mode_status; +struct drm_display_mode; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dsb; +enum intel_output_format; struct intel_plane; struct intel_plane_state; @@ -32,4 +36,13 @@ void skl_scaler_disable(const struct intel_crtc_state *old_crtc_state); void skl_scaler_get_config(struct intel_crtc_state *crtc_state); +enum drm_mode_status +skl_scaler_mode_valid(struct intel_display *display, + const struct drm_display_mode *mode, + enum intel_output_format output_format, + int num_joined_pipes); + +void adl_scaler_ecc_mask(const struct intel_crtc_state *crtc_state); + +void adl_scaler_ecc_unmask(const struct intel_crtc_state *crtc_state); #endif diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 2aa64482d44b..e13fb781e7b2 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -9,9 +9,8 @@ #include <drm/drm_fourcc.h> #include "pxp/intel_pxp.h" - #include "i915_drv.h" -#include "intel_atomic_plane.h" +#include "i915_utils.h" #include "intel_bo.h" #include "intel_de.h" #include "intel_display_irq.h" @@ -21,6 +20,8 @@ #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" +#include "intel_panic.h" +#include "intel_plane.h" #include "intel_psr.h" #include "intel_psr_regs.h" #include "skl_scaler.h" @@ -1167,8 +1168,7 @@ static u32 skl_plane_ctl_crtc(const struct intel_crtc_state *crtc_state) return plane_ctl; } -static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 skl_plane_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1226,8 +1226,7 @@ static u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state) return plane_color_ctl; } -static u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static u32 glk_plane_color_ctl(const struct intel_plane_state *plane_state) { struct intel_display *display = to_intel_display(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1272,12 +1271,6 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, u32 offset = plane_state->view.color_plane[color_plane].offset; if (intel_fb_uses_dpt(fb)) { - /* - * The DPT object contains only one vma, so the VMA's offset - * within the DPT is always 0. - */ - drm_WARN_ON(display->drm, plane_state->dpt_vma && - intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(display->drm, offset & 0x1fffff); return offset >> 9; } else { @@ -1286,13 +1279,20 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, } } -static u32 skl_plane_surf(const struct intel_plane_state *plane_state, - int color_plane) +static int icl_plane_color_plane(const struct intel_plane_state *plane_state) { + if (plane_state->planar_linked_plane && !plane_state->is_y_plane) + return 1; + else + return 0; +} + +static u32 skl_plane_surf_offset(const struct intel_plane_state *plane_state) +{ + int color_plane = icl_plane_color_plane(plane_state); u32 plane_surf; - plane_surf = intel_plane_ggtt_offset(plane_state) + - skl_surf_address(plane_state, color_plane); + plane_surf = skl_surf_address(plane_state, color_plane); if (plane_state->decrypt) plane_surf |= PLANE_SURF_DECRYPT; @@ -1374,14 +1374,6 @@ static void icl_plane_csc_load_black(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CSC_POSTOFF(pipe, plane_id, 2), 0); } -static int icl_plane_color_plane(const struct intel_plane_state *plane_state) -{ - if (plane_state->planar_linked_plane && !plane_state->is_y_plane) - return 1; - else - return 0; -} - static void skl_plane_update_noarm(struct intel_dsb *dsb, struct intel_plane *plane, @@ -1477,7 +1469,7 @@ skl_plane_update_arm(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), plane_ctl); intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, 0)); + plane_state->surf); } static void icl_plane_update_sel_fetch_noarm(struct intel_dsb *dsb, @@ -1633,7 +1625,6 @@ icl_plane_update_arm(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - int color_plane = icl_plane_color_plane(plane_state); u32 plane_ctl; plane_ctl = plane_state->ctl | @@ -1659,7 +1650,7 @@ icl_plane_update_arm(struct intel_dsb *dsb, intel_de_write_dsb(display, dsb, PLANE_CTL(pipe, plane_id), plane_ctl); intel_de_write_dsb(display, dsb, PLANE_SURF(pipe, plane_id), - skl_plane_surf(plane_state, color_plane)); + plane_state->surf); } static void skl_plane_capture_error(struct intel_crtc *crtc, @@ -1683,10 +1674,10 @@ skl_plane_async_flip(struct intel_dsb *dsb, struct intel_display *display = to_intel_display(plane); enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; - u32 plane_ctl = plane_state->ctl, plane_surf; + u32 plane_ctl = plane_state->ctl; + u32 plane_surf = plane_state->surf; plane_ctl |= skl_plane_ctl_crtc(crtc_state); - plane_surf = skl_plane_surf(plane_state, 0); if (async_flip) { if (DISPLAY_VER(display) >= 30) @@ -2328,8 +2319,8 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, max_scale = skl_plane_max_scale(display, fb); } - ret = intel_atomic_plane_check_clipping(plane_state, crtc_state, - min_scale, max_scale, true); + ret = intel_plane_check_clipping(plane_state, crtc_state, + min_scale, max_scale, true); if (ret) return ret; @@ -2364,11 +2355,10 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, plane_state->damage = DRM_RECT_INIT(0, 0, 0, 0); } - plane_state->ctl = skl_plane_ctl(crtc_state, plane_state); + plane_state->ctl = skl_plane_ctl(plane_state); if (DISPLAY_VER(display) >= 10) - plane_state->color_ctl = glk_plane_color_ctl(crtc_state, - plane_state); + plane_state->color_ctl = glk_plane_color_ctl(plane_state); if (intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier) && icl_is_hdr_plane(display, plane->id)) @@ -2792,6 +2782,32 @@ static u8 tgl_plane_caps(struct intel_display *display, return caps; } +static void skl_disable_tiling(struct intel_plane *plane) +{ + struct intel_plane_state *state = to_intel_plane_state(plane->base.state); + struct intel_display *display = to_intel_display(plane); + const struct drm_framebuffer *fb = state->hw.fb; + u32 plane_ctl; + + plane_ctl = intel_de_read(display, PLANE_CTL(plane->pipe, plane->id)); + + if (intel_fb_uses_dpt(fb)) { + /* if DPT is enabled, keep tiling, but disable compression */ + plane_ctl &= ~PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; + } else { + /* if DPT is not supported, disable tiling, and update stride */ + u32 stride = state->view.color_plane[0].scanout_stride / 64; + + plane_ctl &= ~PLANE_CTL_TILED_MASK; + intel_de_write_fw(display, PLANE_STRIDE(plane->pipe, plane->id), + PLANE_STRIDE_(stride)); + } + intel_de_write_fw(display, PLANE_CTL(plane->pipe, plane->id), plane_ctl); + + intel_de_write_fw(display, PLANE_SURF(plane->pipe, plane->id), + state->surf); +} + struct intel_plane * skl_universal_plane_create(struct intel_display *display, enum pipe pipe, enum plane_id plane_id) @@ -2838,6 +2854,9 @@ skl_universal_plane_create(struct intel_display *display, plane->max_height = skl_plane_max_height; plane->min_cdclk = skl_plane_min_cdclk; } + plane->disable_tiling = skl_disable_tiling; + + plane->surf_offset = skl_plane_surf_offset; if (DISPLAY_VER(display) >= 13) plane->max_stride = adl_plane_max_stride; @@ -3010,7 +3029,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, return; } - intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); + intel_fb = intel_framebuffer_alloc(); if (!intel_fb) { drm_dbg_kms(display->drm, "failed to alloc fb\n"); return; @@ -3165,21 +3184,18 @@ bool skl_fixup_initial_plane_config(struct intel_crtc *crtc, to_intel_plane_state(plane->base.state); enum plane_id plane_id = plane->id; enum pipe pipe = crtc->pipe; - u32 base; if (!plane_state->uapi.visible) return false; - base = intel_plane_ggtt_offset(plane_state); - /* * We may have moved the surface to a different * part of ggtt, make the plane aware of that. */ - if (plane_config->base == base) + if (plane_config->base == plane_state->surf) return false; - intel_de_write(display, PLANE_SURF(pipe, plane_id), base); + intel_de_write(display, PLANE_SURF(pipe, plane_id), plane_state->surf); return true; } diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index 2c2371574d6f..d74cbb43ae6f 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -6,14 +6,13 @@ #include <linux/debugfs.h> #include <drm/drm_blend.h> +#include <drm/drm_print.h> #include "soc/intel_dram.h" - -#include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "i9xx_wm.h" #include "intel_atomic.h" -#include "intel_atomic_plane.h" #include "intel_bw.h" #include "intel_cdclk.h" #include "intel_crtc.h" @@ -26,17 +25,33 @@ #include "intel_display_types.h" #include "intel_fb.h" #include "intel_fixed.h" +#include "intel_flipq.h" #include "intel_pcode.h" +#include "intel_plane.h" #include "intel_wm.h" #include "skl_universal_plane_regs.h" #include "skl_watermark.h" #include "skl_watermark_regs.h" -/*It is expected that DSB can do posted writes to every register in - * the pipe and planes within 100us. For flip queue use case, the - * recommended DSB execution time is 100us + one SAGV block time. - */ -#define DSB_EXE_TIME 100 +struct intel_dbuf_state { + struct intel_global_state base; + + struct skl_ddb_entry ddb[I915_MAX_PIPES]; + unsigned int weight[I915_MAX_PIPES]; + u8 slices[I915_MAX_PIPES]; + u8 enabled_slices; + u8 active_pipes; + u8 mdclk_cdclk_ratio; + bool joined_mbus; +}; + +#define to_intel_dbuf_state(global_state) \ + container_of_const((global_state), struct intel_dbuf_state, base) + +#define intel_atomic_get_old_dbuf_state(state) \ + to_intel_dbuf_state(intel_atomic_get_old_global_obj_state(state, &to_intel_display(state)->dbuf.obj)) +#define intel_atomic_get_new_dbuf_state(state) \ + to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state, &to_intel_display(state)->dbuf.obj)) static void skl_sagv_disable(struct intel_display *display); @@ -87,8 +102,6 @@ intel_has_sagv(struct intel_display *display) static u32 intel_sagv_block_time(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(display) >= 14) { u32 val; @@ -99,9 +112,9 @@ intel_sagv_block_time(struct intel_display *display) u32 val = 0; int ret; - ret = snb_pcode_read(&i915->uncore, - GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, - &val, NULL); + ret = intel_pcode_read(display->drm, + GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, + &val, NULL); if (ret) { drm_dbg_kms(display->drm, "Couldn't read SAGV block time!\n"); return 0; @@ -159,7 +172,6 @@ static void intel_sagv_init(struct intel_display *display) */ static void skl_sagv_enable(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret; if (!intel_has_sagv(display)) @@ -169,8 +181,8 @@ static void skl_sagv_enable(struct intel_display *display) return; drm_dbg_kms(display->drm, "Enabling SAGV\n"); - ret = snb_pcode_write(&i915->uncore, GEN9_PCODE_SAGV_CONTROL, - GEN9_SAGV_ENABLE); + ret = intel_pcode_write(display->drm, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_ENABLE); /* We don't need to wait for SAGV when enabling */ @@ -192,7 +204,6 @@ static void skl_sagv_enable(struct intel_display *display) static void skl_sagv_disable(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(display->drm); int ret; if (!intel_has_sagv(display)) @@ -203,10 +214,9 @@ static void skl_sagv_disable(struct intel_display *display) drm_dbg_kms(display->drm, "Disabling SAGV\n"); /* bspec says to keep retrying for at least 1 ms */ - ret = skl_pcode_request(&i915->uncore, GEN9_PCODE_SAGV_CONTROL, - GEN9_SAGV_DISABLE, - GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, - 1); + ret = intel_pcode_request(display->drm, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_DISABLE, + GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 1); /* * Some skl systems, pre-release machines in particular, * don't actually have SAGV. @@ -232,7 +242,7 @@ static void skl_sagv_pre_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (!intel_can_enable_sagv(display, new_bw_state)) + if (!intel_bw_can_enable_sagv(display, new_bw_state)) skl_sagv_disable(display); } @@ -245,74 +255,10 @@ static void skl_sagv_post_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (intel_can_enable_sagv(display, new_bw_state)) + if (intel_bw_can_enable_sagv(display, new_bw_state)) skl_sagv_enable(display); } -static void icl_sagv_pre_plane_update(struct intel_atomic_state *state) -{ - struct intel_display *display = to_intel_display(state); - const struct intel_bw_state *old_bw_state = - intel_atomic_get_old_bw_state(state); - const struct intel_bw_state *new_bw_state = - intel_atomic_get_new_bw_state(state); - u16 old_mask, new_mask; - - if (!new_bw_state) - return; - - old_mask = old_bw_state->qgv_points_mask; - new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; - - if (old_mask == new_mask) - return; - - WARN_ON(!new_bw_state->base.changed); - - drm_dbg_kms(display->drm, "Restricting QGV points: 0x%x -> 0x%x\n", - old_mask, new_mask); - - /* - * Restrict required qgv points before updating the configuration. - * According to BSpec we can't mask and unmask qgv points at the same - * time. Also masking should be done before updating the configuration - * and unmasking afterwards. - */ - icl_pcode_restrict_qgv_points(display, new_mask); -} - -static void icl_sagv_post_plane_update(struct intel_atomic_state *state) -{ - struct intel_display *display = to_intel_display(state); - const struct intel_bw_state *old_bw_state = - intel_atomic_get_old_bw_state(state); - const struct intel_bw_state *new_bw_state = - intel_atomic_get_new_bw_state(state); - u16 old_mask, new_mask; - - if (!new_bw_state) - return; - - old_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; - new_mask = new_bw_state->qgv_points_mask; - - if (old_mask == new_mask) - return; - - WARN_ON(!new_bw_state->base.changed); - - drm_dbg_kms(display->drm, "Relaxing QGV points: 0x%x -> 0x%x\n", - old_mask, new_mask); - - /* - * Allow required qgv points after updating the configuration. - * According to BSpec we can't mask and unmask qgv points at the same - * time. Also masking should be done before updating the configuration - * and unmasking afterwards. - */ - icl_pcode_restrict_qgv_points(display, new_mask); -} - void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); @@ -446,16 +392,6 @@ bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) return skl_crtc_can_enable_sagv(crtc_state); } -bool intel_can_enable_sagv(struct intel_display *display, - const struct intel_bw_state *bw_state) -{ - if (DISPLAY_VER(display) < 11 && - bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) - return false; - - return bw_state->pipe_sagv_reject == 0; -} - static u16 skl_ddb_entry_init(struct skl_ddb_entry *entry, u16 start, u16 end) { @@ -1452,7 +1388,7 @@ skl_allocate_plane_ddb(struct skl_plane_ddb_iter *iter, { u16 size, extra = 0; - if (data_rate) { + if (data_rate && iter->data_rate) { extra = min_t(u16, iter->size, DIV64_U64_ROUND_UP(iter->size * data_rate, iter->data_rate)); @@ -2236,7 +2172,7 @@ cdclk_prefill_adjustment(const struct intel_crtc_state *crtc_state) } return min(1, DIV_ROUND_UP(crtc_state->pixel_rate, - 2 * cdclk_state->logical.cdclk)); + 2 * intel_cdclk_logical(cdclk_state))); } static int @@ -2336,6 +2272,11 @@ static int skl_max_wm0_lines(const struct intel_crtc_state *crtc_state) return wm0_lines; } +/* + * TODO: In case we use PKG_C_LATENCY to allow C-states when the delayed vblank + * size is too small for the package C exit latency we need to notify PSR about + * the scenario to apply Wa_16025596647. + */ static int skl_max_wm_level_for_vblank(struct intel_crtc_state *crtc_state, int wm0_lines) { @@ -2680,6 +2621,97 @@ static char enast(bool enable) return enable ? '*' : ' '; } +static noinline_for_stack void +skl_print_plane_changes(struct intel_display *display, + struct intel_plane *plane, + const struct skl_plane_wm *old_wm, + const struct skl_plane_wm *new_wm) +{ + drm_dbg_kms(display->drm, + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm\n", + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].enable), enast(old_wm->wm[1].enable), + enast(old_wm->wm[2].enable), enast(old_wm->wm[3].enable), + enast(old_wm->wm[4].enable), enast(old_wm->wm[5].enable), + enast(old_wm->wm[6].enable), enast(old_wm->wm[7].enable), + enast(old_wm->trans_wm.enable), + enast(old_wm->sagv.wm0.enable), + enast(old_wm->sagv.trans_wm.enable), + enast(new_wm->wm[0].enable), enast(new_wm->wm[1].enable), + enast(new_wm->wm[2].enable), enast(new_wm->wm[3].enable), + enast(new_wm->wm[4].enable), enast(new_wm->wm[5].enable), + enast(new_wm->wm[6].enable), enast(new_wm->wm[7].enable), + enast(new_wm->trans_wm.enable), + enast(new_wm->sagv.wm0.enable), + enast(new_wm->sagv.trans_wm.enable)); + + drm_dbg_kms(display->drm, + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d" + " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d\n", + plane->base.base.id, plane->base.name, + enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].lines, + enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].lines, + enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].lines, + enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].lines, + enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].lines, + enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].lines, + enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].lines, + enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].lines, + enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.lines, + enast(old_wm->sagv.wm0.ignore_lines), old_wm->sagv.wm0.lines, + enast(old_wm->sagv.trans_wm.ignore_lines), old_wm->sagv.trans_wm.lines, + enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].lines, + enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].lines, + enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].lines, + enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].lines, + enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].lines, + enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].lines, + enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].lines, + enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].lines, + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.lines, + enast(new_wm->sagv.wm0.ignore_lines), new_wm->sagv.wm0.lines, + enast(new_wm->sagv.trans_wm.ignore_lines), new_wm->sagv.trans_wm.lines); + + drm_dbg_kms(display->drm, + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].blocks, old_wm->wm[1].blocks, + old_wm->wm[2].blocks, old_wm->wm[3].blocks, + old_wm->wm[4].blocks, old_wm->wm[5].blocks, + old_wm->wm[6].blocks, old_wm->wm[7].blocks, + old_wm->trans_wm.blocks, + old_wm->sagv.wm0.blocks, + old_wm->sagv.trans_wm.blocks, + new_wm->wm[0].blocks, new_wm->wm[1].blocks, + new_wm->wm[2].blocks, new_wm->wm[3].blocks, + new_wm->wm[4].blocks, new_wm->wm[5].blocks, + new_wm->wm[6].blocks, new_wm->wm[7].blocks, + new_wm->trans_wm.blocks, + new_wm->sagv.wm0.blocks, + new_wm->sagv.trans_wm.blocks); + + drm_dbg_kms(display->drm, + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", + plane->base.base.id, plane->base.name, + old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, + old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, + old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, + old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, + old_wm->trans_wm.min_ddb_alloc, + old_wm->sagv.wm0.min_ddb_alloc, + old_wm->sagv.trans_wm.min_ddb_alloc, + new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, + new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, + new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, + new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, + new_wm->trans_wm.min_ddb_alloc, + new_wm->sagv.wm0.min_ddb_alloc, + new_wm->sagv.trans_wm.min_ddb_alloc); +} + static void skl_print_wm_changes(struct intel_atomic_state *state) { @@ -2709,7 +2741,6 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_ddb_entry_equal(old, new)) continue; - drm_dbg_kms(display->drm, "[PLANE:%d:%s] ddb (%4d - %4d) -> (%4d - %4d), size %4d -> %4d\n", plane->base.base.id, plane->base.name, @@ -2727,89 +2758,7 @@ skl_print_wm_changes(struct intel_atomic_state *state) if (skl_plane_wm_equals(display, old_wm, new_wm)) continue; - drm_dbg_kms(display->drm, - "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm,%cstwm\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].enable), enast(old_wm->wm[1].enable), - enast(old_wm->wm[2].enable), enast(old_wm->wm[3].enable), - enast(old_wm->wm[4].enable), enast(old_wm->wm[5].enable), - enast(old_wm->wm[6].enable), enast(old_wm->wm[7].enable), - enast(old_wm->trans_wm.enable), - enast(old_wm->sagv.wm0.enable), - enast(old_wm->sagv.trans_wm.enable), - enast(new_wm->wm[0].enable), enast(new_wm->wm[1].enable), - enast(new_wm->wm[2].enable), enast(new_wm->wm[3].enable), - enast(new_wm->wm[4].enable), enast(new_wm->wm[5].enable), - enast(new_wm->wm[6].enable), enast(new_wm->wm[7].enable), - enast(new_wm->trans_wm.enable), - enast(new_wm->sagv.wm0.enable), - enast(new_wm->sagv.trans_wm.enable)); - - drm_dbg_kms(display->drm, - "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d" - " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%4d\n", - plane->base.base.id, plane->base.name, - enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].lines, - enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].lines, - enast(old_wm->wm[2].ignore_lines), old_wm->wm[2].lines, - enast(old_wm->wm[3].ignore_lines), old_wm->wm[3].lines, - enast(old_wm->wm[4].ignore_lines), old_wm->wm[4].lines, - enast(old_wm->wm[5].ignore_lines), old_wm->wm[5].lines, - enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].lines, - enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].lines, - enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.lines, - enast(old_wm->sagv.wm0.ignore_lines), old_wm->sagv.wm0.lines, - enast(old_wm->sagv.trans_wm.ignore_lines), old_wm->sagv.trans_wm.lines, - enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].lines, - enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].lines, - enast(new_wm->wm[2].ignore_lines), new_wm->wm[2].lines, - enast(new_wm->wm[3].ignore_lines), new_wm->wm[3].lines, - enast(new_wm->wm[4].ignore_lines), new_wm->wm[4].lines, - enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].lines, - enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].lines, - enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].lines, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.lines, - enast(new_wm->sagv.wm0.ignore_lines), new_wm->sagv.wm0.lines, - enast(new_wm->sagv.trans_wm.ignore_lines), new_wm->sagv.trans_wm.lines); - - drm_dbg_kms(display->drm, - "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].blocks, old_wm->wm[1].blocks, - old_wm->wm[2].blocks, old_wm->wm[3].blocks, - old_wm->wm[4].blocks, old_wm->wm[5].blocks, - old_wm->wm[6].blocks, old_wm->wm[7].blocks, - old_wm->trans_wm.blocks, - old_wm->sagv.wm0.blocks, - old_wm->sagv.trans_wm.blocks, - new_wm->wm[0].blocks, new_wm->wm[1].blocks, - new_wm->wm[2].blocks, new_wm->wm[3].blocks, - new_wm->wm[4].blocks, new_wm->wm[5].blocks, - new_wm->wm[6].blocks, new_wm->wm[7].blocks, - new_wm->trans_wm.blocks, - new_wm->sagv.wm0.blocks, - new_wm->sagv.trans_wm.blocks); - - drm_dbg_kms(display->drm, - "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%5d\n", - plane->base.base.id, plane->base.name, - old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, - old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, - old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, - old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, - old_wm->trans_wm.min_ddb_alloc, - old_wm->sagv.wm0.min_ddb_alloc, - old_wm->sagv.trans_wm.min_ddb_alloc, - new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, - new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, - new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, - new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc, - new_wm->sagv.wm0.min_ddb_alloc, - new_wm->sagv.trans_wm.min_ddb_alloc); + skl_print_plane_changes(display, plane, old_wm, new_wm); } } } @@ -2913,67 +2862,79 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, return 0; } -/* - * If Fixed Refresh Rate or For VRR case Vmin = Vmax = Flipline: - * Program DEEP PKG_C_LATENCY Pkg C with highest valid latency from - * watermark level1 and up and above. If watermark level 1 is - * invalid program it with all 1's. - * Program PKG_C_LATENCY Added Wake Time = DSB execution time - * If Variable Refresh Rate where Vmin != Vmax != Flipline: - * Program DEEP PKG_C_LATENCY Pkg C with all 1's. - * Program PKG_C_LATENCY Added Wake Time = 0 - */ +static int pkgc_max_linetime(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + const struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i, max_linetime; + + /* + * Apparenty the hardware uses WM_LINETIME internally for + * this stuff, compute everything based on that. + */ + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + display->pkgc.disable[crtc->pipe] = crtc_state->vrr.enable; + display->pkgc.linetime[crtc->pipe] = DIV_ROUND_UP(crtc_state->linetime, 8); + } + + max_linetime = 0; + for_each_intel_crtc(display->drm, crtc) { + if (display->pkgc.disable[crtc->pipe]) + return 0; + + max_linetime = max(display->pkgc.linetime[crtc->pipe], max_linetime); + } + + return max_linetime; +} + void intel_program_dpkgc_latency(struct intel_atomic_state *state) { struct intel_display *display = to_intel_display(state); - struct intel_crtc *crtc; - struct intel_crtc_state *new_crtc_state; - u32 latency = LNL_PKG_C_LATENCY_MASK; - u32 added_wake_time = 0; - u32 max_linetime = 0; - u32 clear, val; - bool fixed_refresh_rate = false; - int i; + int max_linetime, latency, added_wake_time = 0; if (DISPLAY_VER(display) < 20) return; - for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { - if (!new_crtc_state->vrr.enable || - (new_crtc_state->vrr.vmin == new_crtc_state->vrr.vmax && - new_crtc_state->vrr.vmin == new_crtc_state->vrr.flipline)) - fixed_refresh_rate = true; + mutex_lock(&display->wm.wm_mutex); - max_linetime = max(new_crtc_state->linetime, max_linetime); - } + latency = skl_watermark_max_latency(display, 1); - if (fixed_refresh_rate) { - added_wake_time = DSB_EXE_TIME + - display->sagv.block_time_us; + /* FIXME runtime changes to enable_flipq are racy */ + if (display->params.enable_flipq) + added_wake_time = intel_flipq_exec_time_us(display); - latency = skl_watermark_max_latency(display, 1); + /* + * Wa_22020432604 + * "PKG_C_LATENCY Added Wake Time field is not working" + */ + if (latency && IS_DISPLAY_VER(display, 20, 30)) { + latency += added_wake_time; + added_wake_time = 0; + } - /* Wa_22020432604 */ - if ((DISPLAY_VER(display) == 20 || DISPLAY_VER(display) == 30) && !latency) { - latency += added_wake_time; - added_wake_time = 0; - } + max_linetime = pkgc_max_linetime(state); - /* Wa_22020299601 */ - if ((latency && max_linetime) && - (DISPLAY_VER(display) == 20 || DISPLAY_VER(display) == 30)) { - latency = max_linetime * DIV_ROUND_UP(latency, max_linetime); - } else if (!latency) { - latency = LNL_PKG_C_LATENCY_MASK; - } + if (max_linetime == 0 || latency == 0) { + latency = REG_FIELD_GET(LNL_PKG_C_LATENCY_MASK, + LNL_PKG_C_LATENCY_MASK); + added_wake_time = 0; + } else { + /* + * Wa_22020299601 + * "Increase the latency programmed in PKG_C_LATENCY Pkg C Latency to be a + * multiple of the pipeline time from WM_LINETIME" + */ + latency = roundup(latency, max_linetime); } - clear = LNL_ADDED_WAKE_TIME_MASK | LNL_PKG_C_LATENCY_MASK; - val = REG_FIELD_PREP(LNL_PKG_C_LATENCY_MASK, latency) | - REG_FIELD_PREP(LNL_ADDED_WAKE_TIME_MASK, added_wake_time); + intel_de_write(display, LNL_PKG_C_LATENCY, + REG_FIELD_PREP(LNL_ADDED_WAKE_TIME_MASK, added_wake_time) | + REG_FIELD_PREP(LNL_PKG_C_LATENCY_MASK, latency)); - intel_de_rmw(display, LNL_PKG_C_LATENCY, clear, val); + mutex_unlock(&display->wm.wm_mutex); } static int @@ -3011,7 +2972,7 @@ skl_compute_wm(struct intel_atomic_state *state) * drm_atomic_check_only() gets upset if we pull more crtcs * into the state, so we have to calculate this based on the * individual intel_crtc_can_enable_sagv() rather than - * the overall intel_can_enable_sagv(). Otherwise the + * the overall intel_bw_can_enable_sagv(). Otherwise the * crtcs not included in the commit would not switch to the * SAGV watermarks when we are about to enable SAGV, and that * would lead to underruns. This does mean extra power draw @@ -3248,12 +3209,12 @@ adjust_wm_latency(struct intel_display *display, } /* - * WA Level-0 adjustment for 16GB DIMMs: SKL+ + * WA Level-0 adjustment for 16Gb DIMMs: SKL+ * If we could not get dimm info enable this WA to prevent from - * any underrun. If not able to get Dimm info assume 16GB dimm + * any underrun. If not able to get DIMM info assume 16Gb DIMM * to avoid any underrun. */ - if (!display->platform.dg2 && dram_info->wm_lv_0_adjust_needed) + if (!display->platform.dg2 && dram_info->has_16gb_dimms) wm[0] += 1; } @@ -3279,7 +3240,6 @@ static void mtl_read_wm_latency(struct intel_display *display, u16 wm[]) static void skl_read_wm_latency(struct intel_display *display, u16 wm[]) { - struct drm_i915_private *i915 = to_i915(display->drm); int num_levels = display->wm.num_levels; int read_latency = DISPLAY_VER(display) >= 12 ? 3 : 2; int mult = display->platform.dg2 ? 2 : 1; @@ -3288,7 +3248,7 @@ static void skl_read_wm_latency(struct intel_display *display, u16 wm[]) /* read the first set of memory latencies[0:3] */ val = 0; /* data0 to be programmed to 0 for first set */ - ret = snb_pcode_read(&i915->uncore, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); + ret = intel_pcode_read(display->drm, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); if (ret) { drm_err(display->drm, "SKL Mailbox read error = %d\n", ret); return; @@ -3301,7 +3261,7 @@ static void skl_read_wm_latency(struct intel_display *display, u16 wm[]) /* read the second set of memory latencies[4:7] */ val = 1; /* data0 to be programmed to 1 for second set */ - ret = snb_pcode_read(&i915->uncore, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); + ret = intel_pcode_read(display->drm, GEN9_PCODE_READ_MEM_LATENCY, &val, NULL); if (ret) { drm_err(display->drm, "SKL Mailbox read error = %d\n", ret); return; @@ -3693,6 +3653,38 @@ void intel_dbuf_post_plane_update(struct intel_atomic_state *state) gen9_dbuf_slices_update(display, new_slices); } +int intel_dbuf_num_enabled_slices(const struct intel_dbuf_state *dbuf_state) +{ + return hweight8(dbuf_state->enabled_slices); +} + +int intel_dbuf_num_active_pipes(const struct intel_dbuf_state *dbuf_state) +{ + return hweight8(dbuf_state->active_pipes); +} + +bool intel_dbuf_pmdemand_needs_update(struct intel_atomic_state *state) +{ + struct intel_display *display = to_intel_display(state); + const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state; + + new_dbuf_state = intel_atomic_get_new_dbuf_state(state); + old_dbuf_state = intel_atomic_get_old_dbuf_state(state); + + if (new_dbuf_state && + new_dbuf_state->active_pipes != old_dbuf_state->active_pipes) + return true; + + if (DISPLAY_VER(display) < 30) { + if (new_dbuf_state && + new_dbuf_state->enabled_slices != + old_dbuf_state->enabled_slices) + return true; + } + + return false; +} + static void skl_mbus_sanitize(struct intel_display *display) { struct intel_dbuf_state *dbuf_state = @@ -4045,14 +4037,14 @@ DEFINE_SHOW_ATTRIBUTE(intel_sagv_status); void skl_watermark_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = display->drm->primary; + struct dentry *debugfs_root = display->drm->debugfs_root; if (HAS_IPC(display)) - debugfs_create_file("i915_ipc_status", 0644, minor->debugfs_root, + debugfs_create_file("i915_ipc_status", 0644, debugfs_root, display, &skl_watermark_ipc_status_fops); if (HAS_SAGV(display)) - debugfs_create_file("i915_sagv_status", 0444, minor->debugfs_root, + debugfs_create_file("i915_sagv_status", 0444, debugfs_root, display, &intel_sagv_status_fops); } diff --git a/drivers/gpu/drm/i915/display/skl_watermark.h b/drivers/gpu/drm/i915/display/skl_watermark.h index 95b0b599d5c3..62790816f030 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.h +++ b/drivers/gpu/drm/i915/display/skl_watermark.h @@ -8,17 +8,15 @@ #include <linux/types.h> -#include "intel_display_limits.h" -#include "intel_global_state.h" -#include "intel_wm_types.h" - +enum plane_id; struct intel_atomic_state; -struct intel_bw_state; struct intel_crtc; struct intel_crtc_state; +struct intel_dbuf_state; struct intel_display; struct intel_plane; struct intel_plane_state; +struct skl_ddb_entry; struct skl_pipe_wm; struct skl_wm_level; @@ -27,8 +25,6 @@ u8 intel_enabled_dbuf_slices_mask(struct intel_display *display); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); void intel_sagv_post_plane_update(struct intel_atomic_state *state); bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state); -bool intel_can_enable_sagv(struct intel_display *display, - const struct intel_bw_state *bw_state); bool intel_has_sagv(struct intel_display *display); u32 skl_ddb_dbuf_slice_mask(struct intel_display *display, @@ -63,28 +59,11 @@ unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_st struct intel_plane *plane, int width, int height, int cpp); -struct intel_dbuf_state { - struct intel_global_state base; - - struct skl_ddb_entry ddb[I915_MAX_PIPES]; - unsigned int weight[I915_MAX_PIPES]; - u8 slices[I915_MAX_PIPES]; - u8 enabled_slices; - u8 active_pipes; - u8 mdclk_cdclk_ratio; - bool joined_mbus; -}; - struct intel_dbuf_state * intel_atomic_get_dbuf_state(struct intel_atomic_state *state); -#define to_intel_dbuf_state(global_state) \ - container_of_const((global_state), struct intel_dbuf_state, base) - -#define intel_atomic_get_old_dbuf_state(state) \ - to_intel_dbuf_state(intel_atomic_get_old_global_obj_state(state, &to_intel_display(state)->dbuf.obj)) -#define intel_atomic_get_new_dbuf_state(state) \ - to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state, &to_intel_display(state)->dbuf.obj)) +int intel_dbuf_num_enabled_slices(const struct intel_dbuf_state *dbuf_state); +int intel_dbuf_num_active_pipes(const struct intel_dbuf_state *dbuf_state); int intel_dbuf_init(struct intel_display *display); int intel_dbuf_state_set_mdclk_cdclk_ratio(struct intel_atomic_state *state, @@ -98,5 +77,7 @@ void intel_dbuf_mbus_pre_ddb_update(struct intel_atomic_state *state); void intel_dbuf_mbus_post_ddb_update(struct intel_atomic_state *state); void intel_program_dpkgc_latency(struct intel_atomic_state *state); +bool intel_dbuf_pmdemand_needs_update(struct intel_atomic_state *state); + #endif /* __SKL_WATERMARK_H__ */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 3433deb635ef..c9a53fde79c4 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -761,7 +761,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, if (display->platform.valleyview || display->platform.cherryview) { /* Disable DPOunit clock gating, can stall pipe */ - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, 0, DPOUNIT_CLOCK_GATE_DISABLE); } @@ -918,7 +918,7 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, } else { vlv_dsi_pll_disable(encoder); - intel_de_rmw(display, DSPCLK_GATE_D(display), + intel_de_rmw(display, VLV_DSPCLK_GATE_D, DPOUNIT_CLOCK_GATE_DISABLE, 0); } @@ -1591,8 +1591,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector) static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; - struct intel_display *display = to_intel_display(connector); struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns, extra_byte_count, tlpx_ui; u32 ui_num, ui_den; diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c index d42b61e6f076..f078b9cda96c 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c @@ -25,12 +25,12 @@ * Yogesh Mohan Marimuthu <yogesh.mohan.marimuthu@intel.com> */ +#include <linux/iopoll.h> #include <linux/kernel.h> #include <linux/string_helpers.h> #include <drm/drm_print.h> -#include "i915_utils.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" @@ -142,11 +142,9 @@ static int vlv_dsi_pclk(struct intel_encoder *encoder, pll_div &= DSI_PLL_M1_DIV_MASK; pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT; - while (pll_ctl) { - pll_ctl = pll_ctl >> 1; - p++; - } - p--; + p = fls(pll_ctl); + if (p) + p--; if (!p) { drm_err(display->drm, "wrong P1 divisor\n"); @@ -216,6 +214,8 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct intel_display *display = to_intel_display(encoder); + u32 val; + int ret; drm_dbg_kms(display->drm, "\n"); @@ -233,9 +233,10 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder, vlv_cck_write(display->drm, CCK_REG_DSI_PLL_CONTROL, config->dsi_pll.ctrl); - if (wait_for(vlv_cck_read(display->drm, CCK_REG_DSI_PLL_CONTROL) & - DSI_PLL_LOCK, 20)) { - + ret = poll_timeout_us(val = vlv_cck_read(display->drm, CCK_REG_DSI_PLL_CONTROL), + val & DSI_PLL_LOCK, + 500, 20 * 1000, false); + if (ret) { vlv_cck_put(display->drm); drm_err(display->drm, "DSI PLL lock failed\n"); return; @@ -262,6 +263,11 @@ void vlv_dsi_pll_disable(struct intel_encoder *encoder) vlv_cck_put(display->drm); } +static bool has_dsic_clock(struct intel_display *display) +{ + return display->platform.broxton; +} + bool bxt_dsi_pll_is_enabled(struct intel_display *display) { bool enabled; @@ -284,7 +290,7 @@ bool bxt_dsi_pll_is_enabled(struct intel_display *display) * causes a system hang. */ val = intel_de_read(display, BXT_DSI_PLL_CTL); - if (display->platform.geminilake) { + if (!has_dsic_clock(display)) { if (!(val & BXT_DSIA_16X_MASK)) { drm_dbg_kms(display->drm, "Invalid PLL divider (%08x)\n", val); @@ -358,6 +364,8 @@ u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, u32 pclk; config->dsi_pll.ctrl = intel_de_read(display, BXT_DSI_PLL_CTL); + if (!has_dsic_clock(display)) + config->dsi_pll.ctrl &= ~BXT_DSIC_16X_MASK; pclk = bxt_dsi_pclk(encoder, config); @@ -514,7 +522,9 @@ int bxt_dsi_pll_compute(struct intel_encoder *encoder, * Spec says both have to be programmed, even if one is not getting * used. Configure MIPI_CLOCK_CTL dividers in modeset */ - config->dsi_pll.ctrl = dsi_ratio | BXT_DSIA_16X_BY2 | BXT_DSIC_16X_BY2; + config->dsi_pll.ctrl = dsi_ratio | BXT_DSIA_16X_BY2; + if (has_dsic_clock(display)) + config->dsi_pll.ctrl |= BXT_DSIC_16X_BY2; /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 15835952352e..ed6599694835 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2158,18 +2158,12 @@ static int set_context_image(struct i915_gem_context *ctx, goto out_ce; } - state = kmalloc(ce->engine->context_size, GFP_KERNEL); - if (!state) { - ret = -ENOMEM; + state = memdup_user(u64_to_user_ptr(user.image), ce->engine->context_size); + if (IS_ERR(state)) { + ret = PTR_ERR(state); goto out_ce; } - if (copy_from_user(state, u64_to_user_ptr(user.image), - ce->engine->context_size)) { - ret = -EFAULT; - goto out_state; - } - shmem_state = shmem_create_from_data(ce->engine->name, state, ce->engine->context_size); if (IS_ERR(shmem_state)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 05e440643aa2..f4f1c979d1b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -105,7 +105,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (!obj->base.filp) return -ENODEV; - ret = call_mmap(obj->base.filp, vma); + ret = vfs_mmap(obj->base.filp, vma); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index ca7e9216934a..39c7c32e1e74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -182,7 +182,7 @@ enum { * the object. Simple! ... The relocation entries are stored in user memory * and so to access them we have to copy them into a local buffer. That copy * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * requiring the vm->mutex (i.e. recursive deadlock). So once again we split * the relocation into multiple passes. First we try to do everything within an * atomic context (avoid the pagefaults) which requires that we never wait. If * we detect that we may wait, or if we need to fault, then we have to fallback @@ -1382,8 +1382,9 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) */ if (flushes & CLFLUSH_AFTER) drm_clflush_virt_range(addr, sizeof(*addr)); - } else + } else { *addr = value; + } } static u64 @@ -1567,36 +1568,36 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) do { u64 offset = eb_relocate_entry(eb, ev, r); - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { + if (likely(offset == 0)) + continue; + + if ((s64)offset < 0) { remain = (int)offset; goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - __put_user(offset, - &urelocs[r - stack].presumed_offset); } + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + __put_user(offset, &urelocs[r - stack].presumed_offset); } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index f6d37dff320d..75f5b0e871ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -5,7 +5,6 @@ #include <linux/anon_inodes.h> #include <linux/mman.h> -#include <linux/pfn_t.h> #include <linux/sizes.h> #include <drm/drm_cache.h> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 1f38e367c60b..478011e5ecb3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -459,8 +459,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) atomic_inc(&i915->mm.free_count); /* - * Since we require blocking on struct_mutex to unbind the freed - * object from the GPU before releasing resources back to the + * Since we require blocking on drm_i915_gem_object->vma.lock to unbind + * the freed object from the GPU before releasing resources back to the * system, we can not do that directly from the RCU callback (which may * be a softirq context), but must instead then defer that work onto a * kthread. We use the RCU callback rather than move the freed object diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index c34f41605b46..148034ef504d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -17,6 +17,8 @@ #include "i915_vma_types.h" enum intel_region_id; +struct drm_scanout_buffer; +struct intel_panic; #define obj_to_i915(obj__) to_i915((obj__)->base.dev) @@ -691,6 +693,11 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); int i915_gem_object_truncate(struct drm_i915_gem_object *obj); +struct intel_panic *i915_gem_object_alloc_panic(void); +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling); +void i915_gem_object_panic_finish(struct intel_panic *panic); + /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj: the object to map into kernel address space diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b6dc3d1b9bb1..b682969e3a29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,12 +89,10 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); - drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); } else { - drm_gem_object_get(intel_bo_to_drm_bo(obj)); rcu_assign_pointer(obj->frontbuffer, front); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 7f83f8bdc8fb..3f09cbce05bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -4,8 +4,11 @@ */ #include <drm/drm_cache.h> +#include <drm/drm_panic.h> #include <linux/vmalloc.h> +#include "display/intel_fb.h" +#include "display/intel_display_types.h" #include "gt/intel_gt.h" #include "gt/intel_tlb.h" @@ -354,6 +357,131 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, return vaddr ?: ERR_PTR(-ENOMEM); } +struct intel_panic { + struct page **pages; + int page; + void *vaddr; +}; + +static void i915_panic_kunmap(struct intel_panic *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +static struct page **i915_gem_object_panic_pages(struct drm_i915_gem_object *obj) +{ + unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i; + struct page *page; + struct page **pages; + struct sgt_iter iter; + + /* For a 3840x2160 32 bits Framebuffer, this should require ~64K */ + pages = kmalloc_array(n_pages, sizeof(*pages), GFP_ATOMIC); + if (!pages) + return NULL; + + i = 0; + for_each_sgt_page(page, iter, obj->mm.pages) + pages[i++] = page; + return pages; +} + +static void i915_gem_object_panic_map_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + unsigned int offset = fb->panic_tiling(sb->width, x, y); + + iosys_map_wr(&sb->map[0], offset, u32, color); +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void i915_gem_object_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + unsigned int new_page; + unsigned int offset; + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct intel_panic *panic = fb->panic; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + i915_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = + kmap_local_page_try_from_panic(panic->pages[panic->page]); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_panic *i915_gem_object_alloc_panic(void) +{ + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); + + return panic; +} + +/* + * Setup the gem framebuffer for drm_panic access. + * Use current vaddr if it exists, or setup a list of pages. + * pfn is not supported yet. + */ +int i915_gem_object_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb, + struct drm_gem_object *_obj, bool panic_tiling) +{ + enum i915_map_type has_type; + struct drm_i915_gem_object *obj = to_intel_bo(_obj); + void *ptr; + + ptr = page_unpack_bits(obj->mm.mapping, &has_type); + if (ptr) { + if (i915_gem_object_has_iomem(obj)) + iosys_map_set_vaddr_iomem(&sb->map[0], (void __iomem *)ptr); + else + iosys_map_set_vaddr(&sb->map[0], ptr); + + if (panic_tiling) + sb->set_pixel = i915_gem_object_panic_map_set_pixel; + return 0; + } + if (i915_gem_object_has_struct_page(obj)) { + panic->pages = i915_gem_object_panic_pages(obj); + if (!panic->pages) + return -ENOMEM; + panic->page = -1; + sb->set_pixel = i915_gem_object_panic_page_set_pixel; + return 0; + } + return -EOPNOTSUPP; +} + +void i915_gem_object_panic_finish(struct intel_panic *panic) +{ + i915_panic_kunmap(panic); + panic->page = -1; + kfree(panic->pages); + panic->pages = NULL; +} + /* get, pin, and map the pages of the object into kernel space */ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, enum i915_map_type type) @@ -637,7 +765,7 @@ __i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); sg = i915_gem_object_get_sg(obj, n, &offset); - return nth_page(sg_page(sg), offset); + return sg_page(sg) + offset; } /* Like i915_gem_object_get_page(), but mark the returned page dirty */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 19a3eb82dc6a..b9dae15c1d16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -6,6 +6,7 @@ #include <linux/pagevec.h> #include <linux/shmem_fs.h> #include <linux/swap.h> +#include <linux/uio.h> #include <drm/drm_cache.h> @@ -302,7 +303,6 @@ void __shmem_writeback(size_t size, struct address_space *mapping) .nr_to_write = SWAP_CLUSTER_MAX, .range_start = 0, .range_end = LLONG_MAX, - .for_reclaim = 1, }; struct folio *folio = NULL; int error = 0; @@ -317,7 +317,7 @@ void __shmem_writeback(size_t size, struct address_space *mapping) if (folio_mapped(folio)) folio_redirty_for_writepage(&wbc, folio); else - error = shmem_writeout(folio, &wbc); + error = shmem_writeout(folio, NULL, NULL); } } @@ -400,12 +400,12 @@ static int shmem_pwrite(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg) { - struct address_space *mapping = obj->base.filp->f_mapping; - const struct address_space_operations *aops = mapping->a_ops; char __user *user_data = u64_to_user_ptr(arg->data_ptr); - u64 remain; - loff_t pos; - unsigned int pg; + struct file *file = obj->base.filp; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t written; + u64 size = arg->size; /* Caller already validated user args */ GEM_BUG_ON(!access_ok(user_data, arg->size)); @@ -428,63 +428,24 @@ shmem_pwrite(struct drm_i915_gem_object *obj, if (obj->mm.madv != I915_MADV_WILLNEED) return -EFAULT; - /* - * Before the pages are instantiated the object is treated as being - * in the CPU domain. The pages will be clflushed as required before - * use, and we can freely write into the pages directly. If userspace - * races pwrite with any other operation; corruption will ensue - - * that is userspace's prerogative! - */ + if (size > MAX_RW_COUNT) + return -EFBIG; - remain = arg->size; - pos = arg->offset; - pg = offset_in_page(pos); + if (!file->f_op->write_iter) + return -EINVAL; - do { - unsigned int len, unwritten; - struct folio *folio; - void *data, *vaddr; - int err; - char __maybe_unused c; - - len = PAGE_SIZE - pg; - if (len > remain) - len = remain; - - /* Prefault the user page to reduce potential recursion */ - err = __get_user(c, user_data); - if (err) - return err; - - err = __get_user(c, user_data + len - 1); - if (err) - return err; - - err = aops->write_begin(obj->base.filp, mapping, pos, len, - &folio, &data); - if (err < 0) - return err; - - vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos)); - pagefault_disable(); - unwritten = __copy_from_user_inatomic(vaddr, user_data, len); - pagefault_enable(); - kunmap_local(vaddr); - - err = aops->write_end(obj->base.filp, mapping, pos, len, - len - unwritten, folio, data); - if (err < 0) - return err; - - /* We don't handle -EFAULT, leave it to the caller to check */ - if (unwritten) - return -ENODEV; - - remain -= len; - user_data += len; - pos += len; - pg = 0; - } while (remain); + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = arg->offset; + iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)user_data, size); + + written = file->f_op->write_iter(&kiocb, &iter); + BUG_ON(written == -EIOCBQUEUED); + + if (written != size) + return -EIO; + + if (written < 0) + return written; return 0; } @@ -553,6 +514,13 @@ static int __create_shmem(struct drm_i915_private *i915, if (IS_ERR(filp)) return PTR_ERR(filp); + /* + * Prevent -EFBIG by allowing large writes beyond MAX_NON_LFS on shmem + * objects by setting O_LARGEFILE. + */ + if (force_o_largefile()) + filp->f_flags |= O_LARGEFILE; + obj->filp = filp; return 0; } @@ -637,9 +605,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, { struct drm_i915_gem_object *obj; struct file *file; - const struct address_space_operations *aops; - loff_t pos; - int err; + loff_t pos = 0; + ssize_t err; GEM_WARN_ON(IS_DGFX(i915)); obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE)); @@ -649,29 +616,15 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); file = obj->base.filp; - aops = file->f_mapping->a_ops; - pos = 0; - do { - unsigned int len = min_t(typeof(size), size, PAGE_SIZE); - struct folio *folio; - void *fsdata; + err = kernel_write(file, data, size, &pos); - err = aops->write_begin(file, file->f_mapping, pos, len, - &folio, &fsdata); - if (err < 0) - goto fail; + if (err < 0) + goto fail; - memcpy_to_folio(folio, offset_in_folio(folio, pos), data, len); - - err = aops->write_end(file, file->f_mapping, pos, len, len, - folio, fsdata); - if (err < 0) - goto fail; - - size -= len; - data += len; - pos += len; - } while (size); + if (err != size) { + err = -EIO; + goto fail; + } return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index b81e67504bbe..7a3e74a6676e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -170,7 +170,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, * Also note that although these lists do not hold a reference to * the object we can safely grab one here: The final object * unreferencing and the bound_list are both protected by the - * dev->struct_mutex and so we won't ever be able to observe an + * i915->mm.obj_lock and so we won't ever be able to observe an * object on the bound_list with a reference count equals 0. */ for (phase = phases; phase->list; phase++) { @@ -185,7 +185,7 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww, /* * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not + * the use of the obj_lock. While the objects are not * yet freed (due to RCU then a workqueue) we still want * to be able to shrink their pages, so they remain on * the unbound/bound list until actually freed. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 7127e90c1a8f..54829801d3f7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -106,11 +106,6 @@ static void fence_set_priority(struct dma_fence *fence, rcu_read_unlock(); } -static inline bool __dma_fence_is_chain(const struct dma_fence *fence) -{ - return fence->ops == &dma_fence_chain_ops; -} - void i915_gem_fence_wait_priority(struct dma_fence *fence, const struct i915_sched_attr *attr) { @@ -126,7 +121,7 @@ void i915_gem_fence_wait_priority(struct dma_fence *fence, for (i = 0; i < array->num_fences; i++) fence_set_priority(array->fences[i], attr); - } else if (__dma_fence_is_chain(fence)) { + } else if (dma_fence_is_chain(fence)) { struct dma_fence *iter; /* The chain is ordered; if we boost the last, we boost all */ @@ -222,10 +217,10 @@ static unsigned long to_wait_timeout(s64 timeout_ns) * * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any * non-zero timeout parameter the wait ioctl will wait for the given number of - * nanoseconds on an object becoming unbusy. Since the wait itself does so - * without holding struct_mutex the object may become re-busied before this - * function completes. A similar but shorter * race condition exists in the busy - * ioctl + * nanoseconds on an object becoming unbusy. Since the wait occurs without + * holding a global or exclusive lock the object may become re-busied before + * this function completes. A similar but shorter * race condition exists + * in the busy ioctl */ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 65d84a93c525..8f13ec4ff0d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -5,6 +5,7 @@ #include <linux/fs.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include "i915_drv.h" #include "i915_gemfs.h" @@ -12,9 +13,10 @@ void i915_gemfs_init(struct drm_i915_private *i915) { - char huge_opt[] = "huge=within_size"; /* r/w */ struct file_system_type *type; + struct fs_context *fc; struct vfsmount *gemfs; + int ret; /* * By creating our own shmemfs mountpoint, we can pass in @@ -38,8 +40,16 @@ void i915_gemfs_init(struct drm_i915_private *i915) if (!type) goto err; - gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); - if (IS_ERR(gemfs)) + fc = fs_context_for_mount(type, SB_KERNMOUNT); + if (IS_ERR(fc)) + goto err; + ret = vfs_parse_fs_string(fc, "source", "tmpfs"); + if (!ret) + ret = vfs_parse_fs_string(fc, "huge", "within_size"); + if (!ret) + gemfs = fc_mount_longterm(fc); + put_fs_context(fc); + if (ret) goto err; i915->mm.gemfs = gemfs; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 86d9d2fcb6a6..539c620364e3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,7 +5,7 @@ #include "i915_selftest.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_engine_user.h" @@ -110,6 +110,7 @@ struct tiled_blits { static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) { + struct intel_display *display = i915->display; int gen = GRAPHICS_VER(i915); /* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */ @@ -121,7 +122,7 @@ static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915) if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 55)) return false; - return HAS_DISPLAY(i915); + return intel_display_device_present(display); } static bool fast_blit_ok(const struct blit_buffer *buf) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 9c3f17e51885..78734c404a6d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1096,32 +1096,20 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, unsigned long addr, bool unfaultable) { - struct vm_area_struct *area; - int err = 0, i; + int i; pr_info("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr); - mmap_read_lock(current->mm); - area = vma_lookup(current->mm, addr); - mmap_read_unlock(current->mm); - if (!area) { - pr_err("%s: Did not create a vm_area_struct for the mmap\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; if (get_user(x, ux)) { - err = -EFAULT; if (!unfaultable) { pr_err("%s: Unable to read from mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - goto out_unmap; + return -EFAULT; } continue; @@ -1130,37 +1118,29 @@ static int ___igt_mmap_migrate(struct drm_i915_private *i915, if (unfaultable) { pr_err("%s: Faulted unmappable memory\n", obj->mm.region->name); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } if (x != expand32(POISON_INUSE)) { pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n", obj->mm.region->name, i * sizeof(x), x, expand32(POISON_INUSE)); - err = -EINVAL; - goto out_unmap; + return -EINVAL; } x = expand32(POISON_FREE); if (put_user(x, ux)) { pr_err("%s: Unable to write to mmap, offset:%zd\n", obj->mm.region->name, i * sizeof(x)); - err = -EFAULT; - goto out_unmap; + return -EFAULT; } } - if (unfaultable) { - if (err == -EFAULT) - err = 0; - } else { - obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; - err = wc_check(obj); - } -out_unmap: - vm_munmap(addr, obj->base.size); - return err; + if (unfaultable) + return 0; + + obj->flags &= ~I915_BO_ALLOC_GPU_ONLY; + return wc_check(obj); } #define IGT_MMAP_MIGRATE_TOPDOWN (1 << 0) @@ -1176,6 +1156,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, struct drm_i915_private *i915 = placements[0]->i915; struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; + struct vm_area_struct *area; unsigned long addr; LIST_HEAD(objects); u64 offset; @@ -1207,20 +1188,30 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, goto out_put; } + mmap_read_lock(current->mm); + area = vma_lookup(current->mm, addr); + mmap_read_unlock(current->mm); + if (!area) { + pr_err("%s: Did not create a vm_area_struct for the mmap\n", + obj->mm.region->name); + err = -EINVAL; + goto out_addr; + } + if (flags & IGT_MMAP_MIGRATE_FILL) { err = igt_fill_mappable(placements[0], &objects); if (err) - goto out_put; + goto out_addr; } err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; err = i915_gem_object_pin_pages(obj); if (err) { i915_gem_object_unlock(obj); - goto out_put; + goto out_addr; } err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL, @@ -1228,7 +1219,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_is_lmem(obj), expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); - if (rq) { + if (rq && !err) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence, @@ -1237,7 +1228,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; if (flags & IGT_MMAP_MIGRATE_EVICTABLE) igt_make_evictable(&objects); @@ -1245,16 +1236,16 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { err = i915_gem_object_lock(obj, NULL); if (err) - goto out_put; + goto out_addr; /* - * Ensure we only simulate the gpu failuire when faulting the + * Ensure we only simulate the gpu failure when faulting the * pages. */ err = i915_gem_object_wait_moving_fence(obj, true); i915_gem_object_unlock(obj); if (err) - goto out_put; + goto out_addr; i915_ttm_migrate_set_failure_modes(true, false); } @@ -1298,6 +1289,9 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, } } +out_addr: + vm_munmap(addr, obj->base.size); + out_put: i915_gem_object_put(obj); igt_close_objects(i915, &objects); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 98c7f6052069..10070ee4d74c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -14,7 +14,6 @@ #include "i915_active_types.h" #include "i915_sw_fence.h" -#include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" #include "intel_wakeref.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 325da0414d94..f6a98cf1e5a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -79,6 +79,29 @@ struct lock_class_key; #define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) #define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) +#define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) +#define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) + +#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ + unsigned int first__ = (first); \ + unsigned int count__ = (count); \ + ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ +}) + +#define ENGINE_INSTANCES_MASK(gt, first, count) \ + __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) + +#define RCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) +#define BCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) +#define VDBOX_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) +#define VEBOX_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) +#define CCS_MASK(gt) \ + ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) + #define GEN6_RING_FAULT_REG_READ(engine__) \ intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__)) @@ -355,4 +378,12 @@ u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); +#define rb_to_uabi_engine(rb) \ + rb_entry_safe(rb, struct intel_engine_cs, uabi_node) + +#define for_each_uabi_engine(engine__, i915__) \ + for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ + (engine__); \ + (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 03baa7fa0a27..7f389cb0bde4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -106,14 +106,18 @@ * preemption, but just sampling the new tail pointer). * */ + #include <linux/interrupt.h> #include <linux/string_helpers.h> +#include "gen8_engine_cs.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "i915_reg.h" +#include "i915_timer_util.h" #include "i915_trace.h" #include "i915_vgpu.h" -#include "gen8_engine_cs.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_heartbeat.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 86b5a9ba323d..c7befc5c20d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -7,6 +7,7 @@ #include "gem/i915_gem_object.h" #include "i915_drv.h" +#include "i915_list_util.h" #include "intel_engine_pm.h" #include "intel_gt_buffer_pool.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 6c499692d61e..88b147fa5cb1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -148,7 +148,7 @@ static u32 gen4_read_clock_frequency(struct intel_uncore *uncore) * * Testing on actual hardware has shown there is no /16. */ - return DIV_ROUND_CLOSEST(i9xx_fsb_freq(uncore->i915), 4) * 1000; + return DIV_ROUND_CLOSEST(intel_fsb_freq(uncore->i915), 4) * 1000; } static u32 read_clock_frequency(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c index 4dc23b8d3aa2..dcd40b30a96b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c @@ -82,14 +82,15 @@ static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root) void intel_gt_debugfs_register(struct intel_gt *gt) { + struct dentry *debugfs_root = gt->i915->drm.debugfs_root; struct dentry *root; char gtname[4]; - if (!gt->i915->drm.primary->debugfs_root) + if (!debugfs_root) return; snprintf(gtname, sizeof(gtname), "gt%u", gt->info.id); - root = debugfs_create_dir(gtname, gt->i915->drm.primary->debugfs_root); + root = debugfs_create_dir(gtname, debugfs_root); if (IS_ERR(root)) return; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index a60822e2b5d4..c3afa321fe30 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9ca42589da4d..bf38cc5fe872 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -341,7 +341,7 @@ static int vlv_rc6_init(struct intel_rc6 *rc6) return PTR_ERR(pctx); } - GEM_BUG_ON(range_overflows_end_t(u64, + GEM_BUG_ON(range_end_overflows_t(u64, i915->dsm.stolen.start, pctx->stolen->start, U32_MAX)); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 4a1675dea1c7..41b5036dc538 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,18 +9,17 @@ #include "display/intel_display_reset.h" #include "display/intel_overlay.h" - #include "gem/i915_gem_context.h" - #include "gt/intel_gt_regs.h" - #include "gt/uc/intel_gsc_fw.h" +#include "uc/intel_guc.h" #include "i915_drv.h" #include "i915_file_private.h" #include "i915_gpu_error.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_engine_pm.h" #include "intel_engine_regs.h" @@ -32,8 +31,6 @@ #include "intel_pci_config.h" #include "intel_reset.h" -#include "uc/intel_guc.h" - #define RESET_MAX_RETRIES 3 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 4f5fd393af6f..ee4eb574a219 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -20,7 +20,7 @@ struct intel_reset { * FENCE registers). * * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit + * acquire a global lock to reset an engine, we need an explicit * flag to prevent two concurrent reset attempts in the same engine. * As the number of engines continues to grow, allocate the flags from * the most significant bits. diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..8314a4b0505e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -15,18 +15,19 @@ #include "i915_irq.h" #include "i915_mitigations.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_gt.h" #include "intel_gt_irq.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" -#include "intel_engine_heartbeat.h" -#include "intel_engine_pm.h" -#include "intel_gt_print.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -610,7 +611,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +623,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0b35fdd461d4..4da94098bd3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -9,9 +9,12 @@ #include "display/intel_display.h" #include "display/intel_display_rps.h" +#include "soc/intel_dram.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" @@ -276,20 +279,24 @@ static void gen5_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); + unsigned int fsb_freq, mem_freq; u8 fmax, fmin, fstart; u32 rgvmodectl; int c_m, i; - if (i915->fsb_freq <= 3200000) + fsb_freq = intel_fsb_freq(i915); + mem_freq = intel_mem_freq(i915); + + if (fsb_freq <= 3200000) c_m = 0; - else if (i915->fsb_freq <= 4800000) + else if (fsb_freq <= 4800000) c_m = 1; else c_m = 2; for (i = 0; i < ARRAY_SIZE(cparams); i++) { if (cparams[i].i == c_m && - cparams[i].t == DIV_ROUND_CLOSEST(i915->mem_freq, 1000)) { + cparams[i].t == DIV_ROUND_CLOSEST(mem_freq, 1000)) { rps->ips.m = cparams[i].m; rps->ips.c = cparams[i].c; break; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 57308c4d664a..85b43f9b9d95 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -9,6 +9,7 @@ #include <linux/lockdep.h> #include "i915_active.h" +#include "i915_list_util.h" #include "i915_syncmap.h" #include "intel_timeline_types.h" diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b37e400f74e5..7d486dfa2fc1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -337,12 +337,26 @@ static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); } static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); + /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization:ivb,hsw + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, + CACHE_MODE_1, + PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); } static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -634,6 +648,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + struct drm_i915_private *i915 = engine->i915; + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL); @@ -669,6 +685,15 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_1406306137:icl,ehl */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); + + if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { + /* + * Disable Repacking for Compression (masked R/W access) + * before rendering compressed surfaces for display. + */ + wa_masked_en(wal, CACHE_MODE_0_GEN7, + DISABLE_REPACKING_FOR_COMPRESSION); + } } /* @@ -2306,15 +2331,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) { - /* - * "Disable Repacking for Compression (masked R/W access) - * before rendering compressed surfaces for display." - */ - wa_masked_en(wal, CACHE_MODE_0_GEN7, - DISABLE_REPACKING_FOR_COMPRESSION); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2565,18 +2581,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_MODE_GEN7(RENDER_RING_BASE), GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); - /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); - - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization:ivb,hsw - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - wa_masked_en(wal, - CACHE_MODE_1, - PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - /* * BSpec recommends 8x4 when MSAA is used, * however in practice 16x4 seems fastest. @@ -2643,9 +2647,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); - /* WaDisable_RenderCache_OperationalFlush:snb */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); - /* * From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index f057c16410e7..4f252f704975 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -904,7 +904,7 @@ static void active_engine(struct kthread_work *work) arg->result = PTR_ERR(ce[count]); pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, arg->result); - while (--count) + while (count--) intel_context_put(ce[count]); return; } diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 69ed946a39e5..a5184f09d1de 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -3,17 +3,17 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_selftest.h" - #include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gen8_engine_cs.h" #include "i915_gem_ww.h" +#include "i915_selftest.h" +#include "i915_wait_util.h" +#include "intel_context.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" -#include "intel_context.h" #include "intel_gt.h" #include "intel_ring.h" diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index aab2759067d2..4a81bc396b21 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include "i915_drv.h" +#include "i915_timer_util.h" #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "sysfs_engines.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index d8edd7c054c8..e7444ebc373e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -10,11 +10,13 @@ #include "gt/intel_gt.h" #include "gt/intel_gt_print.h" + +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_gsc_proxy.h" #include "intel_gsc_uc.h" #include "intel_gsc_uc_heci_cmd_submit.h" -#include "i915_drv.h" -#include "i915_reg.h" /* * GSC proxy: diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 2fde5c360cff..9bd29be7656f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -8,6 +8,8 @@ #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_ring.h" + +#include "i915_wait_util.h" #include "intel_gsc_uc_heci_cmd_submit.h" struct gsc_heci_pkt { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 9df80c325fc1..52ec4421a211 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -8,15 +8,17 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "gt/intel_gt_regs.h" + +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_guc.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_slpc.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_irq.h" -#include "i915_reg.h" /** * DOC: GuC @@ -313,8 +315,13 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. + * + * Platforms post DG2 prevent this issue in hardware by stalling + * submissions. With this flag GuC will schedule as to avoid such + * stalls. */ - if (IS_DG2(gt->i915)) + if (IS_DG2(gt->i915) || + (CCS_MASK(gt) && GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 0d5197c0824a..2c651ec024ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -5,15 +5,16 @@ #include <linux/circ_buf.h> #include <linux/ktime.h> -#include <linux/time64.h> #include <linux/string_helpers.h> +#include <linux/time64.h> #include <linux/timekeeping.h> #include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_ct.h" #include "intel_guc_print.h" -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) enum { CT_DEAD_ALIVE = 0, CT_DEAD_SETUP, @@ -144,7 +145,7 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) spin_lock_init(&ct->requests.lock); INIT_LIST_HEAD(&ct->requests.pending); INIT_LIST_HEAD(&ct->requests.incoming); -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) INIT_WORK(&ct->dead_ct_worker, ct_dead_ct_worker_func); #endif INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); @@ -373,7 +374,7 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) ct->enabled = true; ct->stall_time = KTIME_MAX; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) ct->dead_ct_reported = false; ct->dead_ct_reason = CT_DEAD_ALIVE; #endif @@ -1324,9 +1325,16 @@ static int ct_receive(struct intel_guc_ct *ct) static void ct_try_receive_message(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); int ret; - if (GEM_WARN_ON(!ct->enabled)) + if (!ct->enabled) { + GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress); + return; + } + + /* When interrupt disabled, message handling is not expected */ + if (!guc->interrupts.enabled) return; ret = ct_receive(ct); @@ -1377,7 +1385,7 @@ void intel_guc_ct_print_info(struct intel_guc_ct *ct, ct->ctbs.recv.desc->tail); } -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) static void ct_dead_ct_worker_func(struct work_struct *w) { struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, dead_ct_worker); @@ -1386,6 +1394,9 @@ static void ct_dead_ct_worker_func(struct work_struct *w) if (ct->dead_ct_reported) return; + if (i915_error_injected()) + return; + ct->dead_ct_reported = true; guc_info(guc, "CTB is dead - reason=0x%X\n", ct->dead_ct_reason); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 2c4bb9a941be..e9a6ec4e6d38 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -97,7 +97,7 @@ struct intel_guc_ct { /** @stall_time: time of first time a CTB submission is stalled */ ktime_t stall_time; -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int dead_ct_reason; bool dead_ct_reported; struct work_struct dead_ct_worker; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index e7ccfa520df3..b1bda1b84f0a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -13,9 +13,11 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_rps.h" + +#include "i915_drv.h" +#include "i915_wait_util.h" #include "intel_guc_fw.h" #include "intel_guc_print.h" -#include "i915_drv.h" static void guc_prepare_xfer(struct intel_gt *gt) { @@ -46,6 +48,14 @@ static void guc_prepare_xfer(struct intel_gt *gt) /* allows for 5us (in 10ns units) before GT can go to RC6 */ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); } + + /* + * Starting from IP 12.50 we need to enable the mirroring of GuC + * internal state to debug registers. This is always enabled on previous + * IPs. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) + intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG); } static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index e8a04e476c57..cdff48920ee6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -6,6 +6,8 @@ #include <linux/debugfs.h> #include <linux/string_helpers.h> +#include <drm/drm_managed.h> + #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_irq.h" @@ -220,8 +222,7 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable, */ static int subbuf_start_callback(struct rchan_buf *buf, void *subbuf, - void *prev_subbuf, - size_t prev_padding) + void *prev_subbuf) { /* * Use no-overwrite mode by default, where relay will stop accepting @@ -512,7 +513,11 @@ static void guc_log_relay_unmap(struct intel_guc_log *log) void intel_guc_log_init_early(struct intel_guc_log *log) { - mutex_init(&log->relay.lock); + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_i915(guc); + + drmm_mutex_init(&i915->drm, &log->relay.lock); + drmm_mutex_init(&i915->drm, &log->guc_lock); INIT_WORK(&log->relay.flush_work, copy_debug_logs_work); log->relay.started = false; } @@ -678,7 +683,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX) return -EINVAL; - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&log->guc_lock); if (log->level == level) goto out_unlock; @@ -696,7 +701,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) log->level = level; out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&log->guc_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 02127703be80..13cb93ad0710 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -42,6 +42,14 @@ enum { struct intel_guc_log { u32 level; + /* + * Protects concurrent access and modification of intel_guc_log->level. + * + * This lock replaces the legacy struct_mutex usage in + * intel_guc_log system. + */ + struct mutex guc_lock; + /* Allocation settings */ struct { s32 bytes; /* Size in bytes */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 3fd798837502..f73dab527547 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -96,6 +96,7 @@ #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) #define GUC_SHIM_CONTROL2 _MMIO(0xc068) +#define GUC_ENABLE_DEBUG_REG (1<<11) #define GUC_IS_PRIVILEGED (1<<29) #define GSC_LOADS_HUC (1<<30) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index d5ee6e5e1443..fa9af08f9708 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -3,17 +3,20 @@ * Copyright © 2021 Intel Corporation */ -#include <drm/drm_cache.h> #include <linux/string_helpers.h> +#include <drm/drm_cache.h> + +#include "gt/intel_gt.h" +#include "gt/intel_gt_regs.h" +#include "gt/intel_rps.h" + #include "i915_drv.h" #include "i915_reg.h" -#include "intel_guc_slpc.h" +#include "i915_wait_util.h" #include "intel_guc_print.h" +#include "intel_guc_slpc.h" #include "intel_mchbar_regs.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_regs.h" -#include "gt/intel_rps.h" /** * DOC: SLPC - Dynamic Frequency management diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 127316d2c8aa..68f2b8d363ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -25,16 +25,16 @@ #include "gt/intel_mocs.h" #include "gt/intel_ring.h" +#include "i915_drv.h" +#include "i915_irq.h" +#include "i915_reg.h" +#include "i915_trace.h" +#include "i915_wait_util.h" #include "intel_guc_ads.h" #include "intel_guc_capture.h" #include "intel_guc_print.h" #include "intel_guc_submission.h" -#include "i915_drv.h" -#include "i915_reg.h" -#include "i915_irq.h" -#include "i915_trace.h" - /** * DOC: GuC-based command submission * diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index a91e23c22ea1..d432fdd69833 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1921,7 +1921,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; - bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + bb->ppgtt = s->buf_addr_type != GTT_BUFFER; /* * The start_offset stores the batch buffer's start gma's diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 673534f061ef..415422b5943c 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -194,9 +194,9 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - if (minor->debugfs_root && gvt->debugfs_root) { + if (debugfs_root && gvt->debugfs_root) { debugfs_remove_recursive(vgpu->debugfs); vgpu->debugfs = NULL; } @@ -208,9 +208,9 @@ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_debugfs_init(struct intel_gvt *gvt) { - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root); + gvt->debugfs_root = debugfs_create_dir("gvt", debugfs_root); debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root, &gvt->mmio.num_tracked_mmio); @@ -222,9 +222,9 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) */ void intel_gvt_debugfs_clean(struct intel_gvt *gvt) { - struct drm_minor *minor = gvt->gt->i915->drm.primary; + struct dentry *debugfs_root = gvt->gt->i915->drm.debugfs_root; - if (minor->debugfs_root) { + if (debugfs_root) { debugfs_remove_recursive(gvt->debugfs_root); gvt->debugfs_root = NULL; } diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 2f7208843367..0b810baad20a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -33,14 +33,16 @@ * */ -#include "i915_drv.h" -#include "i915_reg.h" #include "gt/intel_context.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt_regs.h" #include "gt/intel_ring.h" + #include "gvt.h" +#include "i915_drv.h" +#include "i915_reg.h" +#include "i915_wait_util.h" #include "trace.h" #define GEN9_MOCS_SIZE 64 diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 0dbc4e289300..6b0c1162505a 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -257,10 +257,9 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) * claimed the cache and we know that is does not match our * idx. If, and only if, the timeline is currently zero is it * worth competing to claim it atomically for ourselves (for - * only the winner of that race will cmpxchg return the old - * value of 0). + * only the winner of that race will cmpxchg succeed). */ - if (!cached && !cmpxchg64(&it->timeline, 0, idx)) + if (!cached && try_cmpxchg64(&it->timeline, &cached, idx)) return it; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 967c0501e91e..c2e38d4bcd01 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -26,11 +26,11 @@ * */ +#include <linux/debugfs.h> #include <linux/sched/mm.h> #include <linux/sort.h> #include <linux/string_helpers.h> -#include <linux/debugfs.h> #include <drm/drm_debugfs.h> #include "gem/i915_gem_context.h" @@ -54,6 +54,7 @@ #include "i915_irq.h" #include "i915_reg.h" #include "i915_scheduler.h" +#include "i915_wait_util.h" #include "intel_mchbar_regs.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) @@ -720,26 +721,24 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, }; -void i915_debugfs_register(struct drm_i915_private *dev_priv) +void i915_debugfs_register(struct drm_i915_private *i915) { - struct drm_minor *minor = dev_priv->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; int i; - i915_debugfs_params(dev_priv); + i915_debugfs_params(i915); - debugfs_create_file("i915_forcewake_user", S_IRUSR, minor->debugfs_root, - to_i915(minor->dev), &i915_forcewake_fops); + debugfs_create_file("i915_forcewake_user", S_IRUSR, debugfs_root, + i915, &i915_forcewake_fops); for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) { - debugfs_create_file(i915_debugfs_files[i].name, - S_IRUGO | S_IWUSR, - minor->debugfs_root, - to_i915(minor->dev), + debugfs_create_file(i915_debugfs_files[i].name, S_IRUGO | S_IWUSR, + debugfs_root, i915, i915_debugfs_files[i].fops); } drm_debugfs_create_files(i915_debugfs_list, ARRAY_SIZE(i915_debugfs_list), - minor->debugfs_root, minor); + debugfs_root, i915->drm.primary); - i915_gpu_error_debugfs_register(dev_priv); + i915_gpu_error_debugfs_register(i915); } diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 33d2dcb0de65..89ab5eb14779 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -248,11 +248,11 @@ i915_debugfs_create_charp(const char *name, umode_t mode, /* add a subdirectory with files for each i915 param */ struct dentry *i915_debugfs_params(struct drm_i915_private *i915) { - struct drm_minor *minor = i915->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; struct i915_params *params = &i915->params; struct dentry *dir; - dir = debugfs_create_dir("i915_params", minor->debugfs_root); + dir = debugfs_create_dir("i915_params", debugfs_root); if (IS_ERR(dir)) return dir; diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index c6263c6d3384..a28c3710c4d5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -51,13 +51,15 @@ #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_crtc.h" -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "display/intel_display_driver.h" +#include "display/intel_display_power.h" #include "display/intel_dmc.h" #include "display/intel_dp.h" #include "display/intel_dpt.h" #include "display/intel_encoder.h" #include "display/intel_fbdev.h" +#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_opregion.h" #include "display/intel_overlay.h" @@ -977,7 +979,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_power_domains_disable(display); drm_client_dev_suspend(&i915->drm, false); - if (HAS_DISPLAY(i915)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&i915->drm); intel_display_driver_disable_user_access(display); @@ -989,7 +991,7 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_irq_suspend(i915); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(i915)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1060,7 +1062,7 @@ static int i915_drm_suspend(struct drm_device *dev) * properly. */ intel_power_domains_disable(display); drm_client_dev_suspend(dev, false); - if (HAS_DISPLAY(dev_priv)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(dev); intel_display_driver_disable_user_access(display); } @@ -1072,7 +1074,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_irq_suspend(dev_priv); intel_hpd_cancel_work(display); - if (HAS_DISPLAY(dev_priv)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -1219,7 +1221,7 @@ static int i915_drm_resume(struct drm_device *dev) */ intel_irq_resume(dev_priv); - if (HAS_DISPLAY(dev_priv)) + if (intel_display_device_present(display)) drm_mode_config_reset(dev); i915_gem_resume(dev_priv); @@ -1228,14 +1230,14 @@ static int i915_drm_resume(struct drm_device *dev) intel_clock_gating_init(dev_priv); - if (HAS_DISPLAY(dev_priv)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); intel_display_driver_resume(display); - if (HAS_DISPLAY(dev_priv)) { + if (intel_display_device_present(display)) { intel_display_driver_enable_user_access(display); drm_kms_helper_poll_enable(dev); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5e4c49f0d5d4..6a768aad8edd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -66,8 +66,6 @@ struct intel_display; struct intel_pxp; struct vlv_s0ix_state; -#define GEM_QUIRK_PIN_SWIZZLED_PAGES BIT(0) - /* Data Stolen Memory (DSM) aka "i915 stolen memory" */ struct i915_dsm { /* @@ -116,8 +114,7 @@ struct i915_gem_mm { struct intel_memory_region *stolen_region; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; - /** Protects the usage of the GTT stolen memory allocator. This is - * always the inner lock when overlapping with struct_mutex. */ + /** Protects the usage of the GTT stolen memory allocator */ struct mutex stolen_lock; /* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */ @@ -224,6 +221,9 @@ struct drm_i915_private { bool irqs_enabled; + /* LPT/WPT IOSF sideband protection */ + struct mutex sbi_lock; + /* VLV/CHV IOSF sideband */ struct { struct mutex lock; /* protect sideband access */ @@ -239,8 +239,6 @@ struct drm_i915_private { bool preserve_bios_swizzle; - unsigned int fsb_freq, mem_freq, is_ddr3; - unsigned int hpll_freq; unsigned int czclk_freq; @@ -354,14 +352,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) return i915->gt[0]; } -#define rb_to_uabi_engine(rb) \ - rb_entry_safe(rb, struct intel_engine_cs, uabi_node) - -#define for_each_uabi_engine(engine__, i915__) \ - for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ - (engine__); \ - (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) - #define INTEL_INFO(i915) ((i915)->__info) #define RUNTIME_INFO(i915) (&(i915)->__runtime) #define DRIVER_CAPS(i915) (&(i915)->caps) @@ -570,29 +560,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_GEN9_LP(i915) (IS_BROXTON(i915) || IS_GEMINILAKE(i915)) #define IS_GEN9_BC(i915) (GRAPHICS_VER(i915) == 9 && !IS_GEN9_LP(i915)) -#define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) -#define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) - -#define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ - unsigned int first__ = (first); \ - unsigned int count__ = (count); \ - ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ -}) - -#define ENGINE_INSTANCES_MASK(gt, first, count) \ - __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) - -#define RCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) -#define BCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) -#define VDBOX_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) -#define VEBOX_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) -#define CCS_MASK(gt) \ - ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) - #define HAS_MEDIA_RATIO_MODE(i915) (INTEL_INFO(i915)->has_media_ratio_mode) /* diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8c8d43451f35..e14a0c3db999 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -847,8 +847,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) /* * Only called during RPM suspend. All users of the userfault_list * must be holding an RPM wakeref to ensure that this can not - * run concurrently with themselves (and use the struct_mutex for - * protection between themselves). + * run concurrently with themselves. */ list_for_each_entry_safe(obj, on, diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 82e9d289398c..20b3cb29cfff 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -134,4 +134,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +#define GEM_QUIRK_PIN_SWIZZLED_PAGES BIT(0) + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f434b6825fc2..7582ef34bf3f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -685,6 +685,74 @@ static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); } +/* This list includes registers that are useful in debugging GuC hangs. */ +const struct { + u32 start; + u32 count; +} guc_hw_reg_state[] = { + { 0xc0b0, 2 }, + { 0xc000, 65 }, + { 0xc140, 1 }, + { 0xc180, 16 }, + { 0xc1dc, 10 }, + { 0xc300, 79 }, + { 0xc4b4, 47 }, + { 0xc574, 1 }, + { 0xc57c, 1 }, + { 0xc584, 11 }, + { 0xc5c0, 8 }, + { 0xc5e4, 1 }, + { 0xc5ec, 103 }, + { 0xc7c0, 1 }, + { 0xc0b0, 2 } +}; + +static u32 print_range_line(struct drm_i915_error_state_buf *m, u32 start, u32 *dump, u32 count) +{ + if (count >= 8) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3], + dump[4], dump[5], dump[6], dump[7]); + return 8; + } else if (count >= 4) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3]); + return 4; + } else if (count >= 2) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x\n", start, dump[0], dump[1]); + return 2; + } + + err_printf(m, "[0x%04x] 0x%08x\n", start, dump[0]); + return 1; +} + +static void err_print_guc_hw_state(struct drm_i915_error_state_buf *m, u32 *hw_state) +{ + u32 total = 0; + int i; + + if (!hw_state) + return; + + err_printf(m, "GuC Register State:\n"); + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) { + u32 entry = 0; + + while (entry < guc_hw_reg_state[i].count) { + u32 start = guc_hw_reg_state[i].start + entry * sizeof(u32); + u32 count = guc_hw_reg_state[i].count - entry; + u32 *values = hw_state + total + entry; + + entry += print_range_line(m, start, values, count); + } + + GEM_BUG_ON(entry != guc_hw_reg_state[i].count); + total += entry; + } +} + static void err_print_uc(struct drm_i915_error_state_buf *m, const struct intel_uc_coredump *error_uc) { @@ -693,6 +761,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); + err_print_guc_hw_state(m, error_uc->guc.hw_state); intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); @@ -1025,6 +1094,7 @@ static void cleanup_uc(struct intel_uc_coredump *uc) kfree(uc->huc_fw.file_wanted.path); i915_vma_coredump_free(uc->guc.vma_log); i915_vma_coredump_free(uc->guc.vma_ctb); + kfree(uc->guc.hw_state); kfree(uc); } @@ -1721,6 +1791,37 @@ static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; } +static u32 read_guc_state_reg(struct intel_uncore *uncore, int range, int count) +{ + GEM_BUG_ON(range >= ARRAY_SIZE(guc_hw_reg_state)); + GEM_BUG_ON(count >= guc_hw_reg_state[range].count); + + return intel_uncore_read(uncore, + _MMIO(guc_hw_reg_state[range].start + count * sizeof(u32))); +} + +static void gt_record_guc_hw_state(struct intel_uncore *uncore, + struct intel_uc_coredump *error_uc) +{ + u32 *hw_state; + u32 count = 0; + int i, j; + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + count += guc_hw_reg_state[i].count; + + hw_state = kcalloc(count, sizeof(u32), ALLOW_FAIL); + if (!hw_state) + return; + + count = 0; + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + for (j = 0; j < guc_hw_reg_state[i].count; j++) + hw_state[count++] = read_guc_state_reg(uncore, i, j); + + error_uc->guc.hw_state = hw_state; +} + static struct intel_uc_coredump * gt_record_uc(struct intel_gt_coredump *gt, struct i915_vma_compress *compress) @@ -1755,6 +1856,7 @@ gt_record_uc(struct intel_gt_coredump *gt, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); + gt_record_guc_hw_state(gt->_gt->uncore, error_uc); return error_uc; } @@ -2445,11 +2547,11 @@ static const struct file_operations i915_error_state_fops = { void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) { - struct drm_minor *minor = i915->drm.primary; + struct dentry *debugfs_root = i915->drm.debugfs_root; - debugfs_create_file("i915_error_state", 0644, minor->debugfs_root, i915, + debugfs_create_file("i915_error_state", 0644, debugfs_root, i915, &i915_error_state_fops); - debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, + debugfs_create_file("i915_gpu_info", 0644, debugfs_root, i915, &i915_gpu_info_fops); } @@ -2506,8 +2608,8 @@ static const struct bin_attribute error_state_attr = { .attr.name = "error", .attr.mode = S_IRUSR | S_IWUSR, .size = 0, - .read_new = error_state_read, - .write_new = error_state_write, + .read = error_state_read, + .write = error_state_write, }; void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 182324979278..91b3df621a49 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -177,6 +177,7 @@ struct intel_gt_coredump { struct intel_ctb_coredump ctb[2]; struct i915_vma_coredump *vma_ctb; struct i915_vma_coredump *vma_log; + u32 *hw_state; u32 timestamp; u16 last_fence; bool is_guc_capture; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 191ed8bb1d9c..8d5da222a187 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -163,11 +163,6 @@ static void ivb_parity_work(struct work_struct *work) u32 misccpctl; u8 slice = 0; - /* We must turn off DOP level clock gating to access the L3 registers. - * In order to prevent a get/put style interface, acquire struct mutex - * any time we access those registers. - */ - mutex_lock(&dev_priv->drm.struct_mutex); /* If we've screwed up tracking, just let the interrupt fire again */ if (drm_WARN_ON(&dev_priv->drm, !dev_priv->l3_parity.which_slice)) @@ -225,7 +220,6 @@ out: gen5_gt_enable_irq(gt, GT_PARITY_ERROR(dev_priv)); spin_unlock_irq(gt->irq_lock); - mutex_unlock(&dev_priv->drm.struct_mutex); } static irqreturn_t valleyview_irq_handler(int irq, void *arg) @@ -439,7 +433,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) * able to process them after we restore SDEIER (as soon as we restore * it, we'll get an interrupt if SDEIIR still has something to process * due to its back queue). */ - if (!HAS_PCH_NOP(i915)) { + if (!HAS_PCH_NOP(display)) { sde_ier = raw_reg_read(regs, SDEIER); raw_reg_write(regs, SDEIER, 0); } @@ -459,7 +453,7 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) de_iir = raw_reg_read(regs, DEIIR); if (de_iir) { raw_reg_write(regs, DEIIR, de_iir); - if (DISPLAY_VER(i915) >= 7) + if (DISPLAY_VER(display) >= 7) ivb_display_irq_handler(display, de_iir); else ilk_display_irq_handler(display, de_iir); @@ -834,6 +828,7 @@ static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) static u32 i9xx_error_mask(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; /* * On gen2/3 FBC generates (seemingly spurious) * display INVALID_GTT/INVALID_GTT_PTE table errors. @@ -846,7 +841,7 @@ static u32 i9xx_error_mask(struct drm_i915_private *i915) * Unfortunately we can't mask off individual PGTBL_ER bits, * so we just have to mask off all page table errors via EMR. */ - if (HAS_FBC(i915)) + if (HAS_FBC(display)) return I915_ERROR_MEMORY_REFRESH; else return I915_ERROR_PAGE_TABLE | @@ -924,12 +919,12 @@ static void i915_irq_postinstall(struct drm_i915_private *dev_priv) I915_MASTER_ERROR_INTERRUPT | I915_USER_INTERRUPT; - if (DISPLAY_VER(dev_priv) >= 3) { + if (DISPLAY_VER(display) >= 3) { dev_priv->irq_mask &= ~I915_ASLE_INTERRUPT; enable_mask |= I915_ASLE_INTERRUPT; } - if (HAS_HOTPLUG(dev_priv)) { + if (HAS_HOTPLUG(display)) { dev_priv->irq_mask &= ~I915_DISPLAY_PORT_INTERRUPT; enable_mask |= I915_DISPLAY_PORT_INTERRUPT; } @@ -963,7 +958,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) ret = IRQ_HANDLED; - if (HAS_HOTPLUG(dev_priv) && + if (HAS_HOTPLUG(display) && iir & I915_DISPLAY_PORT_INTERRUPT) hotplug_status = i9xx_hpd_irq_ack(display); diff --git a/drivers/gpu/drm/i915/i915_list_util.h b/drivers/gpu/drm/i915/i915_list_util.h new file mode 100644 index 000000000000..4e515dc8a3e0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_list_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_LIST_UTIL_H__ +#define __I915_LIST_UTIL_H__ + +#include <linux/list.h> +#include <asm/rwonce.h> + +static inline void __list_del_many(struct list_head *head, + struct list_head *first) +{ + first->prev = head; + WRITE_ONCE(head->next, first); +} + +static inline int list_is_last_rcu(const struct list_head *list, + const struct list_head *head) +{ + return READ_ONCE(list->next) == head; +} + +#endif /* __I915_LIST_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 21006c7f615c..b2e311f4791a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -663,7 +663,6 @@ static const struct intel_device_info dg1_info = { DGFX_FEATURES, .__runtime.graphics.ip.rel = 10, PLATFORM(INTEL_DG1), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e5a188ce3185..5bc696bfbb0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -108,11 +108,11 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -121,7 +121,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_ptr_util.h b/drivers/gpu/drm/i915/i915_ptr_util.h new file mode 100644 index 000000000000..9f8931d7d99b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ptr_util.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_PTR_UTIL_H__ +#define __I915_PTR_UTIL_H__ + +#include <linux/types.h> + +#define ptr_mask_bits(ptr, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) + +#define ptr_unpack_bits(ptr, bits, n) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + *(bits) = __v & (BIT(n) - 1); \ + (typeof(ptr))(__v & -BIT(n)); \ +}) + +#define ptr_pack_bits(ptr, bits, n) ({ \ + unsigned long __bits = (bits); \ + GEM_BUG_ON(__bits & -BIT(n)); \ + ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ +}) + +#define ptr_dec(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v - 1); \ +}) + +#define ptr_inc(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v + 1); \ +}) + +#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) +#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) +#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) +#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) + +static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) +{ + return a - b; +} + +#define u64_to_ptr(T, x) ({ \ + typecheck(u64, x); \ + (T *)(uintptr_t)(x); \ +}) + +/* + * container_of_user: Extract the superclass from a pointer to a member. + * + * Exactly like container_of() with the exception that it plays nicely + * with sparse for __user @ptr. + */ +#define container_of_user(ptr, type, member) ({ \ + void __user *__mptr = (void __user *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type __user *)(__mptr - offsetof(type, member))); }) + +#endif /* __I915_PTR_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 52a902532e6f..354ef75ef6a5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -385,7 +385,6 @@ #define VLV_PCBR _MMIO(VLV_DISPLAY_BASE + 0x2120) #define VLV_PCBR_ADDR_SHIFT 12 -#define DISPLAY_PLANE_FLIP_PENDING(plane) (1 << (11 - (plane))) /* A and B only */ #define EIR _MMIO(0x20b0) #define EMR _MMIO(0x20b4) #define ESR _MMIO(0x20b8) @@ -413,9 +412,9 @@ #define FW_BLC _MMIO(0x20d8) #define FW_BLC2 _MMIO(0x20dc) #define FW_BLC_SELF _MMIO(0x20e0) /* 915+ only */ -#define FW_BLC_SELF_EN_MASK (1 << 31) -#define FW_BLC_SELF_FIFO_MASK (1 << 16) /* 945 only */ -#define FW_BLC_SELF_EN (1 << 15) /* 945 only */ +#define FW_BLC_SELF_EN_MASK REG_BIT(31) +#define FW_BLC_SELF_FIFO_MASK REG_BIT(16) /* 945 only */ +#define FW_BLC_SELF_EN REG_BIT(15) /* 945 only */ #define MM_BURST_LENGTH 0x00700000 #define MM_FIFO_WATERMARK 0x0001F000 #define LM_BURST_LENGTH 0x00000700 @@ -614,7 +613,8 @@ #define DSTATE_GFX_CLOCK_GATING (1 << 1) #define DSTATE_DOT_CLOCK_GATING (1 << 0) -#define DSPCLK_GATE_D(__i915) _MMIO(DISPLAY_MMIO_BASE(__i915) + 0x6200) +#define DSPCLK_GATE_D _MMIO(0x6200) +#define VLV_DSPCLK_GATE_D _MMIO(VLV_DISPLAY_BASE + 0x6200) # define DPUNIT_B_CLOCK_GATE_DISABLE (1 << 30) /* 965 */ # define VSUNIT_CLOCK_GATE_DISABLE (1 << 29) /* 965 */ # define VRHUNIT_CLOCK_GATE_DISABLE (1 << 28) /* 965 */ @@ -763,8 +763,7 @@ */ #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) #define DARBF_GATING_DIS REG_BIT(27) -#define MTL_PIPEDMC_GATING_DIS_A REG_BIT(15) -#define MTL_PIPEDMC_GATING_DIS_B REG_BIT(14) +#define MTL_PIPEDMC_GATING_DIS(pipe) REG_BIT(15 - (pipe)) #define PWM2_GATING_DIS REG_BIT(14) #define PWM1_GATING_DIS REG_BIT(13) @@ -1205,16 +1204,6 @@ */ #define GEN7_SO_WRITE_OFFSET(n) _MMIO(0x5280 + (n) * 4) -/* SKL Fuse Status */ -enum skl_power_gate { - SKL_PG0, - SKL_PG1, - SKL_PG2, - ICL_PG3, - ICL_PG4, -}; - - #define GEN9_TIMESTAMP_OVERRIDE _MMIO(0x44074) #define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT 0 #define GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK 0x3ff diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 5f7e8138ec14..b09135301f39 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -31,19 +31,20 @@ #include <linux/llist.h> #include <linux/lockdep.h> +#include <uapi/drm/i915_drm.h> + #include "gem/i915_gem_context_types.h" #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "gt/intel_timeline_types.h" #include "i915_gem.h" +#include "i915_ptr_util.h" #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_sw_fence.h" #include "i915_vma_resource.h" -#include <uapi/drm/i915_drm.h> - struct drm_file; struct drm_i915_gem_object; struct drm_printer; diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c index 4c02a04be681..d5b6d8ab31a2 100644 --- a/drivers/gpu/drm/i915/i915_switcheroo.c +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -5,7 +5,7 @@ #include <linux/vga_switcheroo.h> -#include "display/intel_display_core.h" +#include "display/intel_display_device.h" #include "i915_driver.h" #include "i915_drv.h" @@ -15,13 +15,14 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_display *display = i915 ? i915->display : NULL; pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; if (!i915) { dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); return; } - if (!HAS_DISPLAY(i915)) { + if (!intel_display_device_present(display)) { dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); return; } @@ -44,13 +45,15 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, static bool i915_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_display *display = i915 ? i915->display : NULL; /* * FIXME: open_count is protected by drm_global_mutex but that would lead to * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ - return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0; + return i915 && intel_display_device_present(display) && + atomic_read(&i915->drm.open_count) == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index f936e8f1f129..622c66666935 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -140,8 +140,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, static const struct bin_attribute dpf_attrs = { .attr = {.name = "l3_parity", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read_new = i915_l3_read, - .write_new = i915_l3_write, + .read = i915_l3_read, + .write = i915_l3_write, .mmap = NULL, .private = (void *)0 }; @@ -149,8 +149,8 @@ static const struct bin_attribute dpf_attrs = { static const struct bin_attribute dpf_attrs_1 = { .attr = {.name = "l3_parity_slice_1", .mode = (S_IRUSR | S_IWUSR)}, .size = GEN7_L3LOG_SIZE, - .read_new = i915_l3_read, - .write_new = i915_l3_write, + .read = i915_l3_read, + .write = i915_l3_write, .mmap = NULL, .private = (void *)1 }; diff --git a/drivers/gpu/drm/i915/i915_timer_util.c b/drivers/gpu/drm/i915/i915_timer_util.c new file mode 100644 index 000000000000..ee4cfd8b3c07 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <linux/jiffies.h> + +#include "i915_timer_util.h" + +void cancel_timer(struct timer_list *t) +{ + if (!timer_active(t)) + return; + + timer_delete(t); + WRITE_ONCE(t->expires, 0); +} + +void set_timer_ms(struct timer_list *t, unsigned long timeout) +{ + if (!timeout) { + cancel_timer(t); + return; + } + + timeout = msecs_to_jiffies(timeout); + + /* + * Paranoia to make sure the compiler computes the timeout before + * loading 'jiffies' as jiffies is volatile and may be updated in + * the background by a timer tick. All to reduce the complexity + * of the addition and reduce the risk of losing a jiffy. + */ + barrier(); + + /* Keep t->expires = 0 reserved to indicate a canceled timer. */ + mod_timer(t, jiffies + timeout ?: 1); +} diff --git a/drivers/gpu/drm/i915/i915_timer_util.h b/drivers/gpu/drm/i915/i915_timer_util.h new file mode 100644 index 000000000000..f35ad730820c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timer_util.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_TIMER_UTIL_H__ +#define __I915_TIMER_UTIL_H__ + +#include <linux/timer.h> +#include <asm/rwonce.h> + +void cancel_timer(struct timer_list *t); +void set_timer_ms(struct timer_list *t, unsigned long timeout); + +static inline bool timer_active(const struct timer_list *t) +{ + return READ_ONCE(t->expires); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return timer_active(t) && !timer_pending(t); +} + +#endif /* __I915_TIMER_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index b60c28fbd207..49f7ed413132 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -47,36 +47,6 @@ bool i915_error_injected(void) #endif -void cancel_timer(struct timer_list *t) -{ - if (!timer_active(t)) - return; - - timer_delete(t); - WRITE_ONCE(t->expires, 0); -} - -void set_timer_ms(struct timer_list *t, unsigned long timeout) -{ - if (!timeout) { - cancel_timer(t); - return; - } - - timeout = msecs_to_jiffies(timeout); - - /* - * Paranoia to make sure the compiler computes the timeout before - * loading 'jiffies' as jiffies is volatile and may be updated in - * the background by a timer tick. All to reduce the complexity - * of the addition and reduce the risk of losing a jiffy. - */ - barrier(); - - /* Keep t->expires = 0 reserved to indicate a canceled timer. */ - mod_timer(t, jiffies + timeout ?: 1); -} - bool i915_vtd_active(struct drm_i915_private *i915) { if (device_iommu_mapped(i915->drm.dev)) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f7fb40cfdb70..a0c892e4c40d 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,7 +25,6 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H -#include <linux/list.h> #include <linux/overflow.h> #include <linux/sched.h> #include <linux/string_helpers.h> @@ -38,7 +37,6 @@ #endif struct drm_i915_private; -struct timer_list; #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) @@ -67,88 +65,12 @@ bool i915_error_injected(void); drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \ }) -#define range_overflows(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ >= max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_t(type, start, size, max) \ - range_overflows((type)(start), (type)(size), (type)(max)) - -#define range_overflows_end(start, size, max) ({ \ - typeof(start) start__ = (start); \ - typeof(size) size__ = (size); \ - typeof(max) max__ = (max); \ - (void)(&start__ == &size__); \ - (void)(&start__ == &max__); \ - start__ > max__ || size__ > max__ - start__; \ -}) - -#define range_overflows_end_t(type, start, size, max) \ - range_overflows_end((type)(start), (type)(size), (type)(max)) - -#define ptr_mask_bits(ptr, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) - -#define ptr_unpack_bits(ptr, bits, n) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - *(bits) = __v & (BIT(n) - 1); \ - (typeof(ptr))(__v & -BIT(n)); \ -}) - -#define ptr_pack_bits(ptr, bits, n) ({ \ - unsigned long __bits = (bits); \ - GEM_BUG_ON(__bits & -BIT(n)); \ - ((typeof(ptr))((unsigned long)(ptr) | __bits)); \ -}) - -#define ptr_dec(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v - 1); \ -}) - -#define ptr_inc(ptr) ({ \ - unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v + 1); \ -}) - -#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) -#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) -#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) -#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) - #define fetch_and_zero(ptr) ({ \ typeof(*ptr) __T = *(ptr); \ *(ptr) = (typeof(*ptr))0; \ __T; \ }) -static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) -{ - return a - b; -} - -/* - * container_of_user: Extract the superclass from a pointer to a member. - * - * Exactly like container_of() with the exception that it plays nicely - * with sparse for __user @ptr. - */ -#define container_of_user(ptr, type, member) ({ \ - void __user *__mptr = (void __user *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), typeof_member(type, member)) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - ((type __user *)(__mptr - offsetof(type, member))); }) - /* * check_user_mbz: Check that a user value exists and is zero * @@ -167,11 +89,6 @@ static __always_inline ptrdiff_t ptrdiff(const void *a, const void *b) get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0; \ }) -#define u64_to_ptr(T, x) ({ \ - typecheck(u64, x); \ - (T *)(uintptr_t)(x); \ -}) - #define __mask_next_bit(mask) ({ \ int __idx = ffs(mask) - 1; \ mask &= ~BIT(__idx); \ @@ -183,19 +100,6 @@ static inline bool is_power_of_2_u64(u64 n) return (n != 0 && ((n & (n - 1)) == 0)); } -static inline void __list_del_many(struct list_head *head, - struct list_head *first) -{ - first->prev = head; - WRITE_ONCE(head->next, first); -} - -static inline int list_is_last_rcu(const struct list_head *list, - const struct list_head *head) -{ - return READ_ONCE(list->next) == head; -} - static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) { unsigned long j = msecs_to_jiffies(m); @@ -230,107 +134,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -/* - * __wait_for - magic wait macro - * - * Macro to help avoid open coding check/wait/timeout patterns. Note that it's - * important that we check the condition again after having timed out, since the - * timeout could be due to preemption or similar and we've never had a chance to - * check the condition before the timeout. - */ -#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ - long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ - int ret__; \ - might_sleep(); \ - for (;;) { \ - const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret__ = 0; \ - break; \ - } \ - if (expired__) { \ - ret__ = -ETIMEDOUT; \ - break; \ - } \ - usleep_range(wait__, wait__ * 2); \ - if (wait__ < (Wmax)) \ - wait__ <<= 1; \ - } \ - ret__; \ -}) - -#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ - (Wmax)) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) - -/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) -#else -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -#endif - -#define _wait_for_atomic(COND, US, ATOMIC) \ -({ \ - int cpu, ret, timeout = (US) * 1000; \ - u64 base; \ - _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - cpu = smp_processor_id(); \ - } \ - base = local_clock(); \ - for (;;) { \ - u64 now = local_clock(); \ - if (!(ATOMIC)) \ - preempt_enable(); \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret = 0; \ - break; \ - } \ - if (now - base >= timeout) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - cpu_relax(); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - if (unlikely(cpu != smp_processor_id())) { \ - timeout -= now - base; \ - cpu = smp_processor_id(); \ - base = local_clock(); \ - } \ - } \ - } \ - ret; \ -}) - -#define wait_for_us(COND, US) \ -({ \ - int ret__; \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10, 10); \ - else \ - ret__ = _wait_for_atomic((COND), (US), 0); \ - ret__; \ -}) - -#define wait_for_atomic_us(COND, US) \ -({ \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - BUILD_BUG_ON((US) > 50000); \ - _wait_for_atomic((COND), (US), 1); \ -}) - -#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) - #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) @@ -346,19 +149,6 @@ static inline void __add_taint_for_CI(unsigned int taint) add_taint(taint, LOCKDEP_STILL_OK); } -void cancel_timer(struct timer_list *t); -void set_timer_ms(struct timer_list *t, unsigned long timeout); - -static inline bool timer_active(const struct timer_list *t) -{ - return READ_ONCE(t->expires); -} - -static inline bool timer_expired(const struct timer_list *t) -{ - return timer_active(t) && !timer_pending(t); -} - static inline bool i915_run_as_guest(void) { #if IS_ENABLED(CONFIG_X86) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 632e316f8b05..25e97031d76e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1607,6 +1607,26 @@ err_rpm: return err; } +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_gem_ww_ctx ww; + int err; + + i915_gem_ww_ctx_init(&ww, true); +retry: + err = i915_gem_object_lock(vma->obj, &ww); + if (!err) + err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(&ww); + if (!err) + goto retry; + } + i915_gem_ww_ctx_fini(&ww); + + return err; +} + static void flush_idle_contexts(struct intel_gt *gt) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 6a6be8048aa8..8054047840aa 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -30,12 +30,12 @@ #include <drm/drm_mm.h> -#include "gt/intel_ggtt_fencing.h" #include "gem/i915_gem_object.h" - -#include "i915_gem_gtt.h" +#include "gt/intel_ggtt_fencing.h" #include "i915_active.h" +#include "i915_gem_gtt.h" +#include "i915_ptr_util.h" #include "i915_request.h" #include "i915_vma_resource.h" #include "i915_vma_types.h" @@ -289,26 +289,8 @@ int __must_check i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (!err) - err = i915_vma_pin_ww(vma, &ww, size, alignment, flags); - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - return err; -} +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u32 align, unsigned int flags); @@ -353,6 +335,11 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node, return drm_mm_node_allocated(node) && node->color != color; } +static inline void __iomem *i915_vma_get_iomap(struct i915_vma *vma) +{ + return READ_ONCE(vma->iomap); +} + /** * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture * @vma: VMA to iomap diff --git a/drivers/gpu/drm/i915/i915_wait_util.h b/drivers/gpu/drm/i915/i915_wait_util.h new file mode 100644 index 000000000000..7376898e3bf8 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_wait_util.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2025 Intel Corporation */ + +#ifndef __I915_WAIT_UTIL_H__ +#define __I915_WAIT_UTIL_H__ + +#include <linux/compiler.h> +#include <linux/delay.h> +#include <linux/ktime.h> +#include <linux/sched/clock.h> +#include <linux/smp.h> + +/* + * __wait_for - magic wait macro + * + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. + */ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ + int ret__; \ + might_sleep(); \ + for (;;) { \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ + OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret__ = 0; \ + break; \ + } \ + if (expired__) { \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ + } \ + ret__; \ +}) + +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + +/* + * If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. + * On PREEMPT_RT the context isn't becoming atomic because it is used in an + * interrupt handler or because a spinlock_t is acquired. This leads to + * warnings which don't occur otherwise and therefore the check is disabled. + */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) && IS_ENABLED(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) +#else +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +#endif + +#define _wait_for_atomic(COND, US, ATOMIC) \ +({ \ + int cpu, ret, timeout = (US) * 1000; \ + u64 base; \ + _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + cpu = smp_processor_id(); \ + } \ + base = local_clock(); \ + for (;;) { \ + u64 now = local_clock(); \ + if (!(ATOMIC)) \ + preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret = 0; \ + break; \ + } \ + if (now - base >= timeout) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + cpu_relax(); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + if (unlikely(cpu != smp_processor_id())) { \ + timeout -= now - base; \ + cpu = smp_processor_id(); \ + base = local_clock(); \ + } \ + } \ + } \ + ret; \ +}) + +#define wait_for_us(COND, US) \ +({ \ + int ret__; \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + if ((US) > 10) \ + ret__ = _wait_for((COND), (US), 10, 10); \ + else \ + ret__ = _wait_for_atomic((COND), (US), 0); \ + ret__; \ +}) + +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) + +#endif /* __I915_WAIT_UTIL_H__ */ diff --git a/drivers/gpu/drm/i915/intel_clock_gating.c b/drivers/gpu/drm/i915/intel_clock_gating.c index f86a3629ae9e..467740969431 100644 --- a/drivers/gpu/drm/i915/intel_clock_gating.c +++ b/drivers/gpu/drm/i915/intel_clock_gating.c @@ -132,16 +132,17 @@ static void ibx_init_clock_gating(struct drm_i915_private *i915) static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) { + struct intel_display *display = dev_priv->display; enum pipe pipe; - for_each_pipe(dev_priv, pipe) { - intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(dev_priv, pipe), + for_each_pipe(display, pipe) { + intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(display, pipe), 0, DISP_TRICKLE_FEED_DISABLE); - intel_uncore_rmw(&dev_priv->uncore, DSPSURF(dev_priv, pipe), + intel_uncore_rmw(&dev_priv->uncore, DSPSURF(display, pipe), 0, 0); intel_uncore_posting_read(&dev_priv->uncore, - DSPSURF(dev_priv, pipe)); + DSPSURF(display, pipe)); } } @@ -218,7 +219,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *i915) /* The below fixes the weird display corruption, a few pixels shifted * downward, on (only) LVDS of some HP laptops with IVY. */ - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { val = intel_uncore_read(&i915->uncore, TRANS_CHICKEN2(pipe)); val |= TRANS_CHICKEN2_TIMING_OVERRIDE; val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; @@ -229,7 +230,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *i915) intel_uncore_write(&i915->uncore, TRANS_CHICKEN2(pipe), val); } /* WADP0ClockGatingDisable */ - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { intel_uncore_write(&i915->uncore, TRANS_CHICKEN1(pipe), TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } @@ -307,11 +308,13 @@ static void gen6_init_clock_gating(struct drm_i915_private *i915) static void lpt_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; + /* * TODO: this bit should only be enabled when really needed, then * disabled when not needed anymore in order to save power. */ - if (HAS_PCH_LPT_LP(i915)) + if (HAS_PCH_LPT_LP(display)) intel_uncore_rmw(&i915->uncore, SOUTH_DSPCLK_GATE_D, 0, PCH_LP_PARTITION_LEVEL_DISABLE); @@ -355,7 +358,9 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) static void cnp_init_clock_gating(struct drm_i915_private *i915) { - if (!HAS_PCH_CNP(i915)) + struct intel_display *display = i915->display; + + if (!HAS_PCH_CNP(display)) return; /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */ @@ -421,6 +426,7 @@ static void skl_init_clock_gating(struct drm_i915_private *i915) static void bdw_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; enum pipe pipe; /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ @@ -432,7 +438,7 @@ static void bdw_init_clock_gating(struct drm_i915_private *i915) /* WaPsrDPAMaskVBlankInSRD:bdw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PAR1_1, 0, HSW_MASK_VBL_TO_PIPE_IN_SRD); - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PIPESL_1(pipe), 0, BDW_UNMASK_VBL_TO_REGS_IN_SRD); @@ -468,6 +474,7 @@ static void bdw_init_clock_gating(struct drm_i915_private *i915) static void hsw_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; enum pipe pipe; /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ @@ -476,7 +483,7 @@ static void hsw_init_clock_gating(struct drm_i915_private *i915) /* WaPsrDPAMaskVBlankInSRD:hsw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PAR1_1, 0, HSW_MASK_VBL_TO_PIPE_IN_SRD); - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* WaPsrDPRSUnmaskVBlankInSRD:hsw */ intel_uncore_rmw(&i915->uncore, CHICKEN_PIPESL_1(pipe), 0, HSW_UNMASK_VBL_TO_REGS_IN_SRD); @@ -494,6 +501,8 @@ static void hsw_init_clock_gating(struct drm_i915_private *i915) static void ivb_init_clock_gating(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; + intel_uncore_write(&i915->uncore, ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); /* WaFbcAsynchFlipDisableFbcQueue:ivb */ @@ -531,7 +540,7 @@ static void ivb_init_clock_gating(struct drm_i915_private *i915) intel_uncore_rmw(&i915->uncore, GEN6_MBCUNIT_SNPCR, GEN6_MBC_SNPCR_MASK, GEN6_MBC_SNPCR_MED); - if (!HAS_PCH_NOP(i915)) + if (!HAS_PCH_NOP(display)) cpt_init_clock_gating(i915); gen6_check_mch_setup(i915); @@ -611,7 +620,7 @@ static void g4x_init_clock_gating(struct drm_i915_private *i915) OVCUNIT_CLOCK_GATE_DISABLE; if (IS_GM45(i915)) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; - intel_uncore_write(&i915->uncore, DSPCLK_GATE_D(i915), dspclk_gate); + intel_uncore_write(&i915->uncore, DSPCLK_GATE_D, dspclk_gate); g4x_disable_trickle_feed(i915); } @@ -622,7 +631,7 @@ static void i965gm_init_clock_gating(struct drm_i915_private *i915) intel_uncore_write(uncore, RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); intel_uncore_write(uncore, RENCLK_GATE_D2, 0); - intel_uncore_write(uncore, DSPCLK_GATE_D(i915), 0); + intel_uncore_write(uncore, DSPCLK_GATE_D, 0); intel_uncore_write(uncore, RAMCLK_GATE_D, 0); intel_uncore_write16(uncore, DEUC, 0); intel_uncore_write(uncore, diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c index 87ac4446d306..ca57a3dd3148 100644 --- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c +++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c @@ -62,6 +62,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) { struct drm_i915_private *dev_priv = iter->i915; + struct intel_display *display = dev_priv->display; MMIO_RING_D(RING_IMR); MMIO_D(SDEIMR); @@ -133,38 +134,38 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x650b4)); MMIO_D(_MMIO(0xc4040)); MMIO_D(DERRMR); - MMIO_D(PIPEDSL(dev_priv, PIPE_A)); - MMIO_D(PIPEDSL(dev_priv, PIPE_B)); - MMIO_D(PIPEDSL(dev_priv, PIPE_C)); - MMIO_D(PIPEDSL(dev_priv, _PIPE_EDP)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_A)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_B)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_C)); - MMIO_D(TRANSCONF(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPESTAT(dev_priv, PIPE_A)); - MMIO_D(PIPESTAT(dev_priv, PIPE_B)); - MMIO_D(PIPESTAT(dev_priv, PIPE_C)); - MMIO_D(PIPESTAT(dev_priv, _PIPE_EDP)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_A)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_B)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, PIPE_C)); - MMIO_D(PIPE_FLIPCOUNT_G4X(dev_priv, _PIPE_EDP)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_A)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_B)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, PIPE_C)); - MMIO_D(PIPE_FRMCOUNT_G4X(dev_priv, _PIPE_EDP)); - MMIO_D(CURCNTR(dev_priv, PIPE_A)); - MMIO_D(CURCNTR(dev_priv, PIPE_B)); - MMIO_D(CURCNTR(dev_priv, PIPE_C)); - MMIO_D(CURPOS(dev_priv, PIPE_A)); - MMIO_D(CURPOS(dev_priv, PIPE_B)); - MMIO_D(CURPOS(dev_priv, PIPE_C)); - MMIO_D(CURBASE(dev_priv, PIPE_A)); - MMIO_D(CURBASE(dev_priv, PIPE_B)); - MMIO_D(CURBASE(dev_priv, PIPE_C)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_A)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_B)); - MMIO_D(CUR_FBC_CTL(dev_priv, PIPE_C)); + MMIO_D(PIPEDSL(display, PIPE_A)); + MMIO_D(PIPEDSL(display, PIPE_B)); + MMIO_D(PIPEDSL(display, PIPE_C)); + MMIO_D(PIPEDSL(display, _PIPE_EDP)); + MMIO_D(TRANSCONF(display, TRANSCODER_A)); + MMIO_D(TRANSCONF(display, TRANSCODER_B)); + MMIO_D(TRANSCONF(display, TRANSCODER_C)); + MMIO_D(TRANSCONF(display, TRANSCODER_EDP)); + MMIO_D(PIPESTAT(display, PIPE_A)); + MMIO_D(PIPESTAT(display, PIPE_B)); + MMIO_D(PIPESTAT(display, PIPE_C)); + MMIO_D(PIPESTAT(display, _PIPE_EDP)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_A)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_B)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, PIPE_C)); + MMIO_D(PIPE_FLIPCOUNT_G4X(display, _PIPE_EDP)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_A)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_B)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, PIPE_C)); + MMIO_D(PIPE_FRMCOUNT_G4X(display, _PIPE_EDP)); + MMIO_D(CURCNTR(display, PIPE_A)); + MMIO_D(CURCNTR(display, PIPE_B)); + MMIO_D(CURCNTR(display, PIPE_C)); + MMIO_D(CURPOS(display, PIPE_A)); + MMIO_D(CURPOS(display, PIPE_B)); + MMIO_D(CURPOS(display, PIPE_C)); + MMIO_D(CURBASE(display, PIPE_A)); + MMIO_D(CURBASE(display, PIPE_B)); + MMIO_D(CURBASE(display, PIPE_C)); + MMIO_D(CUR_FBC_CTL(display, PIPE_A)); + MMIO_D(CUR_FBC_CTL(display, PIPE_B)); + MMIO_D(CUR_FBC_CTL(display, PIPE_C)); MMIO_D(_MMIO(0x700ac)); MMIO_D(_MMIO(0x710ac)); MMIO_D(_MMIO(0x720ac)); @@ -172,32 +173,32 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(_MMIO(0x70094)); MMIO_D(_MMIO(0x70098)); MMIO_D(_MMIO(0x7009c)); - MMIO_D(DSPCNTR(dev_priv, PIPE_A)); - MMIO_D(DSPADDR(dev_priv, PIPE_A)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_A)); - MMIO_D(DSPPOS(dev_priv, PIPE_A)); - MMIO_D(DSPSIZE(dev_priv, PIPE_A)); - MMIO_D(DSPSURF(dev_priv, PIPE_A)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_A)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_A)); + MMIO_D(DSPCNTR(display, PIPE_A)); + MMIO_D(DSPADDR(display, PIPE_A)); + MMIO_D(DSPSTRIDE(display, PIPE_A)); + MMIO_D(DSPPOS(display, PIPE_A)); + MMIO_D(DSPSIZE(display, PIPE_A)); + MMIO_D(DSPSURF(display, PIPE_A)); + MMIO_D(DSPOFFSET(display, PIPE_A)); + MMIO_D(DSPSURFLIVE(display, PIPE_A)); MMIO_D(REG_50080(PIPE_A, PLANE_PRIMARY)); - MMIO_D(DSPCNTR(dev_priv, PIPE_B)); - MMIO_D(DSPADDR(dev_priv, PIPE_B)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_B)); - MMIO_D(DSPPOS(dev_priv, PIPE_B)); - MMIO_D(DSPSIZE(dev_priv, PIPE_B)); - MMIO_D(DSPSURF(dev_priv, PIPE_B)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_B)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_B)); + MMIO_D(DSPCNTR(display, PIPE_B)); + MMIO_D(DSPADDR(display, PIPE_B)); + MMIO_D(DSPSTRIDE(display, PIPE_B)); + MMIO_D(DSPPOS(display, PIPE_B)); + MMIO_D(DSPSIZE(display, PIPE_B)); + MMIO_D(DSPSURF(display, PIPE_B)); + MMIO_D(DSPOFFSET(display, PIPE_B)); + MMIO_D(DSPSURFLIVE(display, PIPE_B)); MMIO_D(REG_50080(PIPE_B, PLANE_PRIMARY)); - MMIO_D(DSPCNTR(dev_priv, PIPE_C)); - MMIO_D(DSPADDR(dev_priv, PIPE_C)); - MMIO_D(DSPSTRIDE(dev_priv, PIPE_C)); - MMIO_D(DSPPOS(dev_priv, PIPE_C)); - MMIO_D(DSPSIZE(dev_priv, PIPE_C)); - MMIO_D(DSPSURF(dev_priv, PIPE_C)); - MMIO_D(DSPOFFSET(dev_priv, PIPE_C)); - MMIO_D(DSPSURFLIVE(dev_priv, PIPE_C)); + MMIO_D(DSPCNTR(display, PIPE_C)); + MMIO_D(DSPADDR(display, PIPE_C)); + MMIO_D(DSPSTRIDE(display, PIPE_C)); + MMIO_D(DSPPOS(display, PIPE_C)); + MMIO_D(DSPSIZE(display, PIPE_C)); + MMIO_D(DSPSURF(display, PIPE_C)); + MMIO_D(DSPOFFSET(display, PIPE_C)); + MMIO_D(DSPSURFLIVE(display, PIPE_C)); MMIO_D(REG_50080(PIPE_C, PLANE_PRIMARY)); MMIO_D(SPRCTL(PIPE_A)); MMIO_D(SPRLINOFF(PIPE_A)); @@ -238,73 +239,73 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(SPRSCALE(PIPE_C)); MMIO_D(SPRSURFLIVE(PIPE_C)); MMIO_D(REG_50080(PIPE_C, PLANE_SPRITE0)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_A)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_A)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_B)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_B)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_C)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_C)); - MMIO_D(PIPESRC(dev_priv, TRANSCODER_C)); - MMIO_D(TRANS_HTOTAL(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_HBLANK(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_HSYNC(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VTOTAL(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VBLANK(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VSYNC(dev_priv, TRANSCODER_EDP)); - MMIO_D(BCLRPAT(dev_priv, TRANSCODER_EDP)); - MMIO_D(TRANS_VSYNCSHIFT(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_A)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_B)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_C)); - MMIO_D(PIPE_DATA_M1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_N1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_M2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_DATA_N2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_M1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_N1(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_M2(dev_priv, TRANSCODER_EDP)); - MMIO_D(PIPE_LINK_N2(dev_priv, TRANSCODER_EDP)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_A)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_A)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_A)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_A)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_A)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_A)); + MMIO_D(BCLRPAT(display, TRANSCODER_A)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_A)); + MMIO_D(PIPESRC(display, TRANSCODER_A)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_B)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_B)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_B)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_B)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_B)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_B)); + MMIO_D(BCLRPAT(display, TRANSCODER_B)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_B)); + MMIO_D(PIPESRC(display, TRANSCODER_B)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_C)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_C)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_C)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_C)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_C)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_C)); + MMIO_D(BCLRPAT(display, TRANSCODER_C)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_C)); + MMIO_D(PIPESRC(display, TRANSCODER_C)); + MMIO_D(TRANS_HTOTAL(display, TRANSCODER_EDP)); + MMIO_D(TRANS_HBLANK(display, TRANSCODER_EDP)); + MMIO_D(TRANS_HSYNC(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VTOTAL(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VBLANK(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VSYNC(display, TRANSCODER_EDP)); + MMIO_D(BCLRPAT(display, TRANSCODER_EDP)); + MMIO_D(TRANS_VSYNCSHIFT(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_A)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_A)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_B)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_B)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_C)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_C)); + MMIO_D(PIPE_DATA_M1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_N1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_M2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_DATA_N2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_M1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_N1(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_M2(display, TRANSCODER_EDP)); + MMIO_D(PIPE_LINK_N2(display, TRANSCODER_EDP)); MMIO_D(PF_CTL(PIPE_A)); MMIO_D(PF_WIN_SZ(PIPE_A)); MMIO_D(PF_WIN_POS(PIPE_A)); @@ -513,12 +514,12 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(GAMMA_MODE(PIPE_A)); MMIO_D(GAMMA_MODE(PIPE_B)); MMIO_D(GAMMA_MODE(PIPE_C)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_A)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_B)); - MMIO_D(TRANS_MULT(dev_priv, TRANSCODER_C)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_A)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_B)); - MMIO_D(HSW_TVIDEO_DIP_CTL(dev_priv, TRANSCODER_C)); + MMIO_D(TRANS_MULT(display, TRANSCODER_A)); + MMIO_D(TRANS_MULT(display, TRANSCODER_B)); + MMIO_D(TRANS_MULT(display, TRANSCODER_C)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_A)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_B)); + MMIO_D(HSW_TVIDEO_DIP_CTL(display, TRANSCODER_C)); MMIO_D(SFUSE_STRAP); MMIO_D(SBI_ADDR); MMIO_D(SBI_DATA); @@ -1111,6 +1112,7 @@ static int iterate_skl_plus_mmio(struct intel_gvt_mmio_table_iter *iter) static int iterate_bxt_mmio(struct intel_gvt_mmio_table_iter *iter) { struct drm_i915_private *dev_priv = iter->i915; + struct intel_display *display = dev_priv->display; MMIO_F(_MMIO(0x80000), 0x3000); MMIO_D(GEN7_SAMPLER_INSTDONE); @@ -1242,9 +1244,9 @@ static int iterate_bxt_mmio(struct intel_gvt_mmio_table_iter *iter) MMIO_D(BXT_DSI_PLL_ENABLE); MMIO_D(GEN9_CLKGATE_DIS_0); MMIO_D(GEN9_CLKGATE_DIS_4); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_A)); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_B)); - MMIO_D(HSW_TVIDEO_DIP_GCP(dev_priv, TRANSCODER_C)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_A)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_B)); + MMIO_D(HSW_TVIDEO_DIP_GCP(display, TRANSCODER_C)); MMIO_D(RC6_CTX_BASE); MMIO_D(GEN8_PUSHBUS_CONTROL); MMIO_D(GEN8_PUSHBUS_ENABLE); diff --git a/drivers/gpu/drm/i915/intel_pcode.c b/drivers/gpu/drm/i915/intel_pcode.c index 3db2ba439bb5..55ffedad2490 100644 --- a/drivers/gpu/drm/i915/intel_pcode.c +++ b/drivers/gpu/drm/i915/intel_pcode.c @@ -5,6 +5,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_wait_util.h" #include "intel_pcode.h" static int gen6_check_mailbox_status(u32 mbox) @@ -110,13 +111,12 @@ int snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) } int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) + int timeout_ms) { int err; mutex_lock(&uncore->i915->sb_lock); - err = __snb_pcode_rw(uncore, mbox, &val, NULL, - fast_timeout_us, slow_timeout_ms, false); + err = __snb_pcode_rw(uncore, mbox, &val, NULL, 250, timeout_ms, false); mutex_unlock(&uncore->i915->sb_lock); if (err) { @@ -273,3 +273,27 @@ int snb_pcode_write_p(struct intel_uncore *uncore, u32 mbcmd, u32 p1, u32 p2, u3 return err; } + +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct drm_i915_private *i915 = to_i915(drm); + + return snb_pcode_read(&i915->uncore, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct drm_i915_private *i915 = to_i915(drm); + + return snb_pcode_write_timeout(&i915->uncore, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct drm_i915_private *i915 = to_i915(drm); + + return skl_pcode_request(&i915->uncore, mbox, request, reply_mask, reply, + timeout_base_ms); +} diff --git a/drivers/gpu/drm/i915/intel_pcode.h b/drivers/gpu/drm/i915/intel_pcode.h index 8d2198e29422..c91a821a88d4 100644 --- a/drivers/gpu/drm/i915/intel_pcode.h +++ b/drivers/gpu/drm/i915/intel_pcode.h @@ -8,13 +8,13 @@ #include <linux/types.h> +struct drm_device; struct intel_uncore; int snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1); -int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms); +int snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, int timeout_ms); #define snb_pcode_write(uncore, mbox, val) \ - snb_pcode_write_timeout(uncore, mbox, val, 500, 0) + snb_pcode_write_timeout((uncore), (mbox), (val), 1) int skl_pcode_request(struct intel_uncore *uncore, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); @@ -27,4 +27,13 @@ int intel_pcode_init(struct intel_uncore *uncore); int snb_pcode_read_p(struct intel_uncore *uncore, u32 mbcmd, u32 p1, u32 p2, u32 *val); int snb_pcode_write_p(struct intel_uncore *uncore, u32 mbcmd, u32 p1, u32 p2, u32 val); +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif /* _INTEL_PCODE_H */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8d9f4c410546..7ce3e6de0c19 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -59,7 +59,9 @@ static struct drm_i915_private *rpm_to_i915(struct intel_runtime_pm *rpm) static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { - ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, dev_name(rpm->kdev)); + if (!rpm->debug.class) + ref_tracker_dir_init(&rpm->debug, INTEL_REFTRACK_DEAD_COUNT, + "intel_runtime_pm"); } static intel_wakeref_t diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c8e29fd72290..8cb59f8d1f4c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -21,19 +21,20 @@ * IN THE SOFTWARE. */ -#include <drm/drm_managed.h> #include <linux/pm_runtime.h> -#include "display/intel_display_core.h" +#include <drm/drm_managed.h> -#include "gt/intel_gt.h" +#include "display/intel_display_core.h" #include "gt/intel_engine_regs.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_regs.h" #include "i915_drv.h" #include "i915_iosf_mbi.h" #include "i915_reg.h" #include "i915_vgpu.h" +#include "i915_wait_util.h" #include "intel_uncore_trace.h" #define FORCEWAKE_ACK_TIMEOUT_MS 50 @@ -2502,6 +2503,7 @@ static int sanity_check_mmio_access(struct intel_uncore *uncore) int intel_uncore_init_mmio(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; + struct intel_display *display = i915->display; int ret; ret = sanity_check_mmio_access(uncore); @@ -2536,7 +2538,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.read_fw_domains); GEM_BUG_ON(intel_uncore_has_forcewake(uncore) != !!uncore->funcs.write_fw_domains); - if (HAS_FPGA_DBG_UNCLAIMED(i915)) + if (HAS_FPGA_DBG_UNCLAIMED(display)) uncore->flags |= UNCORE_HAS_FPGA_DBG_UNCLAIMED; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 51561b190b93..7fa194de5d35 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -114,7 +114,8 @@ void __intel_wakeref_init(struct intel_wakeref *wf, "wakeref.work", &key->work, 0); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF) - ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, name); + if (!wf->debug.class) + ref_tracker_dir_init(&wf->debug, INTEL_REFTRACK_DEAD_COUNT, "intel_wakeref"); #endif } diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c index f8da693ad3ce..27d545c4e6a5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c @@ -2,15 +2,15 @@ /* * Copyright(c) 2020 Intel Corporation. */ + #include <linux/workqueue.h> #include "gem/i915_gem_context.h" - #include "gt/intel_context.h" #include "gt/intel_gt.h" #include "i915_drv.h" - +#include "i915_wait_util.h" #include "intel_pxp.h" #include "intel_pxp_gsccs.h" #include "intel_pxp_irq.h" diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c index e07c5b380789..545f79eb0cc5 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c @@ -69,17 +69,17 @@ DEFINE_SIMPLE_ATTRIBUTE(pxp_terminate_fops, pxp_terminate_get, pxp_terminate_set void intel_pxp_debugfs_register(struct intel_pxp *pxp) { - struct drm_minor *minor; + struct dentry *debugfs_root; struct dentry *pxproot; if (!intel_pxp_is_supported(pxp)) return; - minor = pxp->ctrl_gt->i915->drm.primary; - if (!minor->debugfs_root) + debugfs_root = pxp->ctrl_gt->i915->drm.debugfs_root; + if (!debugfs_root) return; - pxproot = debugfs_create_dir("pxp", minor->debugfs_root); + pxproot = debugfs_create_dir("pxp", debugfs_root); if (IS_ERR(pxproot)) return; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 88870844b5bd..48cd617247d1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -22,14 +22,13 @@ * */ -#include <linux/prime_numbers.h> #include <linux/pm_qos.h> +#include <linux/prime_numbers.h> #include <linux/sort.h> #include "gem/i915_gem_internal.h" #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" - #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" @@ -40,11 +39,11 @@ #include "i915_random.h" #include "i915_selftest.h" +#include "i915_wait_util.h" #include "igt_flush_test.h" #include "igt_live_test.h" #include "igt_spinner.h" #include "lib_sw_fence.h" - #include "mock_drm.h" #include "mock_gem_device.h" @@ -73,8 +72,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +90,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +159,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +218,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +236,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 889281819c5b..9c276c9d0a75 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -31,7 +31,7 @@ #include "i915_driver.h" #include "i915_drv.h" #include "i915_selftest.h" - +#include "i915_wait_util.h" #include "igt_flush_test.h" struct i915_selftest i915_selftest __read_mostly = { diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 8c3e1f20e5a1..820364171ebe 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,12 +3,13 @@ * * Copyright © 2018 Intel Corporation */ -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" #include "gem/i915_gem_internal.h" #include "gem/selftests/igt_gem_utils.h" +#include "gt/intel_gpu_commands.h" +#include "gt/intel_gt.h" +#include "i915_wait_util.h" #include "igt_spinner.h" int igt_spinner_init(struct igt_spinner *spin, struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index f08f6674911e..7b856b5090f9 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -413,15 +413,8 @@ static int igt_mock_splintered_region(void *arg) close_objects(mem, &objects); - /* - * While we should be able allocate everything without any flag - * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are - * actually limited to the largest power-of-two for the region size i.e - * max_order, due to the inner workings of the buddy allocator. So make - * sure that does indeed hold true. - */ - - obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + obj = igt_object_create(mem, &objects, roundup_pow_of_two(size), + I915_BO_ALLOC_CONTIGUOUS); if (!IS_ERR(obj)) { pr_err("%s too large contiguous allocation was not rejected\n", __func__); @@ -429,8 +422,7 @@ static int igt_mock_splintered_region(void *arg) goto out_close; } - obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), - I915_BO_ALLOC_CONTIGUOUS); + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) { pr_err("%s largest possible contiguous allocation failed\n", __func__); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 41eaa9b7f67d..58bcbdcef563 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -277,13 +277,15 @@ static int live_forcewake_domains(void *arg) #define FW_RANGE 0x40000 struct intel_gt *gt = arg; struct intel_uncore *uncore = gt->uncore; + struct drm_i915_private *i915 = gt->i915; + struct intel_display *display = i915->display; unsigned long *valid; u32 offset; int err; - if (!HAS_FPGA_DBG_UNCLAIMED(gt->i915) && - !IS_VALLEYVIEW(gt->i915) && - !IS_CHERRYVIEW(gt->i915)) + if (!HAS_FPGA_DBG_UNCLAIMED(display) && + !IS_VALLEYVIEW(i915) && + !IS_CHERRYVIEW(i915)) return 0; /* diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 59032c939d0f..edffaed8f9a7 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -11,9 +11,11 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_utils.h" #include "intel_dram.h" #include "intel_mchbar_regs.h" #include "intel_pcode.h" +#include "intel_uncore.h" #include "vlv_iosf_sb.h" struct dram_dimm_info { @@ -29,10 +31,11 @@ struct dram_channel_info { #define DRAM_TYPE_STR(type) [INTEL_DRAM_ ## type] = #type -static const char *intel_dram_type_str(enum intel_dram_type type) +const char *intel_dram_type_str(enum intel_dram_type type) { static const char * const str[] = { DRAM_TYPE_STR(UNKNOWN), + DRAM_TYPE_STR(DDR2), DRAM_TYPE_STR(DDR3), DRAM_TYPE_STR(DDR4), DRAM_TYPE_STR(LPDDR3), @@ -53,9 +56,10 @@ static const char *intel_dram_type_str(enum intel_dram_type type) #undef DRAM_TYPE_STR -static bool pnv_is_ddr3(struct drm_i915_private *i915) +static enum intel_dram_type pnv_dram_type(struct drm_i915_private *i915) { - return intel_uncore_read(&i915->uncore, CSHRDDR3CTL) & CSHRDDR3CTL_DDR3; + return intel_uncore_read(&i915->uncore, CSHRDDR3CTL) & CSHRDDR3CTL_DDR3 ? + INTEL_DRAM_DDR3 : INTEL_DRAM_DDR2; } static unsigned int pnv_mem_freq(struct drm_i915_private *dev_priv) @@ -134,25 +138,21 @@ static unsigned int vlv_mem_freq(struct drm_i915_private *i915) return 0; } -static void detect_mem_freq(struct drm_i915_private *i915) +unsigned int intel_mem_freq(struct drm_i915_private *i915) { if (IS_PINEVIEW(i915)) - i915->mem_freq = pnv_mem_freq(i915); + return pnv_mem_freq(i915); else if (GRAPHICS_VER(i915) == 5) - i915->mem_freq = ilk_mem_freq(i915); + return ilk_mem_freq(i915); else if (IS_CHERRYVIEW(i915)) - i915->mem_freq = chv_mem_freq(i915); + return chv_mem_freq(i915); else if (IS_VALLEYVIEW(i915)) - i915->mem_freq = vlv_mem_freq(i915); - - if (IS_PINEVIEW(i915)) - i915->is_ddr3 = pnv_is_ddr3(i915); - - if (i915->mem_freq) - drm_dbg(&i915->drm, "DDR speed: %d kHz\n", i915->mem_freq); + return vlv_mem_freq(i915); + else + return 0; } -unsigned int i9xx_fsb_freq(struct drm_i915_private *i915) +static unsigned int i9xx_fsb_freq(struct drm_i915_private *i915) { u32 fsb; @@ -234,15 +234,30 @@ static unsigned int ilk_fsb_freq(struct drm_i915_private *dev_priv) } } -static void detect_fsb_freq(struct drm_i915_private *i915) +unsigned int intel_fsb_freq(struct drm_i915_private *i915) { if (GRAPHICS_VER(i915) == 5) - i915->fsb_freq = ilk_fsb_freq(i915); + return ilk_fsb_freq(i915); else if (GRAPHICS_VER(i915) == 3 || GRAPHICS_VER(i915) == 4) - i915->fsb_freq = i9xx_fsb_freq(i915); + return i9xx_fsb_freq(i915); + else + return 0; +} - if (i915->fsb_freq) - drm_dbg(&i915->drm, "FSB frequency: %d kHz\n", i915->fsb_freq); +static int i915_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) +{ + dram_info->fsb_freq = intel_fsb_freq(i915); + if (dram_info->fsb_freq) + drm_dbg(&i915->drm, "FSB frequency: %d kHz\n", dram_info->fsb_freq); + + dram_info->mem_freq = intel_mem_freq(i915); + if (dram_info->mem_freq) + drm_dbg(&i915->drm, "DDR speed: %d kHz\n", dram_info->mem_freq); + + if (IS_PINEVIEW(i915)) + dram_info->type = pnv_dram_type(i915); + + return 0; } static int intel_dimm_num_devices(const struct dram_dimm_info *dimm) @@ -391,6 +406,9 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram u32 val; int ret; + /* Assume 16Gb DIMMs are present until proven otherwise */ + dram_info->has_16gb_dimms = true; + val = intel_uncore_read(&i915->uncore, SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN); ret = skl_dram_get_channel_info(i915, &ch0, 0, val); @@ -413,13 +431,16 @@ skl_dram_get_channels_info(struct drm_i915_private *i915, struct dram_info *dram return -EINVAL; } - dram_info->wm_lv_0_adjust_needed = ch0.is_16gb_dimm || ch1.is_16gb_dimm; + dram_info->has_16gb_dimms = ch0.is_16gb_dimm || ch1.is_16gb_dimm; dram_info->symmetric_memory = intel_is_dram_symmetric(&ch0, &ch1); drm_dbg_kms(&i915->drm, "Memory configuration is symmetric? %s\n", str_yes_no(dram_info->symmetric_memory)); + drm_dbg_kms(&i915->drm, "16Gb DIMMs: %s\n", + str_yes_no(dram_info->has_16gb_dimms)); + return 0; } @@ -590,8 +611,8 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, u32 val = 0; int ret; - ret = snb_pcode_read(&dev_priv->uncore, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | - ICL_PCODE_MEM_SS_READ_GLOBAL_INFO, &val, NULL); + ret = intel_pcode_read(&dev_priv->drm, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | + ICL_PCODE_MEM_SS_READ_GLOBAL_INFO, &val, NULL); if (ret) return ret; @@ -648,8 +669,9 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, static int gen11_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) { - int ret = skl_get_dram_info(i915, dram_info); + int ret; + ret = skl_dram_get_channels_info(i915, dram_info); if (ret) return ret; @@ -658,8 +680,6 @@ static int gen11_get_dram_info(struct drm_i915_private *i915, struct dram_info * static int gen12_get_dram_info(struct drm_i915_private *i915, struct dram_info *dram_info) { - dram_info->wm_lv_0_adjust_needed = false; - return icl_pcode_read_mem_global_info(i915, dram_info); } @@ -708,13 +728,11 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915, struct dram_info int intel_dram_detect(struct drm_i915_private *i915) { + struct intel_display *display = i915->display; struct dram_info *dram_info; int ret; - detect_fsb_freq(i915); - detect_mem_freq(i915); - - if (GRAPHICS_VER(i915) < 9 || IS_DG2(i915) || !HAS_DISPLAY(i915)) + if (IS_DG2(i915) || !intel_display_device_present(display)) return 0; dram_info = drmm_kzalloc(&i915->drm, sizeof(*dram_info), GFP_KERNEL); @@ -723,13 +741,7 @@ int intel_dram_detect(struct drm_i915_private *i915) i915->dram_info = dram_info; - /* - * Assume level 0 watermark latency adjustment is needed until proven - * otherwise, this w/a is not needed by bxt/glk. - */ - dram_info->wm_lv_0_adjust_needed = !IS_BROXTON(i915) && !IS_GEMINILAKE(i915); - - if (DISPLAY_VER(i915) >= 14) + if (DISPLAY_VER(display) >= 14) ret = xelpdp_get_dram_info(i915, dram_info); else if (GRAPHICS_VER(i915) >= 12) ret = gen12_get_dram_info(i915, dram_info); @@ -737,23 +749,23 @@ int intel_dram_detect(struct drm_i915_private *i915) ret = gen11_get_dram_info(i915, dram_info); else if (IS_BROXTON(i915) || IS_GEMINILAKE(i915)) ret = bxt_get_dram_info(i915, dram_info); - else + else if (GRAPHICS_VER(i915) >= 9) ret = skl_get_dram_info(i915, dram_info); + else + ret = i915_get_dram_info(i915, dram_info); drm_dbg_kms(&i915->drm, "DRAM type: %s\n", intel_dram_type_str(dram_info->type)); + drm_dbg_kms(&i915->drm, "DRAM channels: %u\n", dram_info->num_channels); + + drm_dbg_kms(&i915->drm, "Num QGV points %u\n", dram_info->num_qgv_points); + drm_dbg_kms(&i915->drm, "Num PSF GV points %u\n", dram_info->num_psf_gv_points); + /* TODO: Do we want to abort probe on dram detection failures? */ if (ret) return 0; - drm_dbg_kms(&i915->drm, "Num qgv points %u\n", dram_info->num_qgv_points); - - drm_dbg_kms(&i915->drm, "DRAM channels: %u\n", dram_info->num_channels); - - drm_dbg_kms(&i915->drm, "Watermark level 0 adjustment needed: %s\n", - str_yes_no(dram_info->wm_lv_0_adjust_needed)); - return 0; } diff --git a/drivers/gpu/drm/i915/soc/intel_dram.h b/drivers/gpu/drm/i915/soc/intel_dram.h index 2a696e03aad4..03a973f1c941 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.h +++ b/drivers/gpu/drm/i915/soc/intel_dram.h @@ -12,11 +12,9 @@ struct drm_i915_private; struct drm_device; struct dram_info { - bool wm_lv_0_adjust_needed; - u8 num_channels; - bool symmetric_memory; enum intel_dram_type { INTEL_DRAM_UNKNOWN, + INTEL_DRAM_DDR2, INTEL_DRAM_DDR3, INTEL_DRAM_DDR4, INTEL_DRAM_LPDDR3, @@ -27,13 +25,20 @@ struct dram_info { INTEL_DRAM_GDDR_ECC, __INTEL_DRAM_TYPE_MAX, } type; + unsigned int fsb_freq; + unsigned int mem_freq; + u8 num_channels; u8 num_qgv_points; u8 num_psf_gv_points; + bool symmetric_memory; + bool has_16gb_dimms; }; void intel_dram_edram_detect(struct drm_i915_private *i915); int intel_dram_detect(struct drm_i915_private *i915); -unsigned int i9xx_fsb_freq(struct drm_i915_private *i915); +unsigned int intel_fsb_freq(struct drm_i915_private *i915); +unsigned int intel_mem_freq(struct drm_i915_private *i915); const struct dram_info *intel_dram_info(struct drm_device *drm); +const char *intel_dram_type_str(enum intel_dram_type type); #endif /* __INTEL_DRAM_H__ */ diff --git a/drivers/gpu/drm/i915/soc/intel_gmch.c b/drivers/gpu/drm/i915/soc/intel_gmch.c index 5346b8dda79a..f210c9655b53 100644 --- a/drivers/gpu/drm/i915/soc/intel_gmch.c +++ b/drivers/gpu/drm/i915/soc/intel_gmch.c @@ -148,7 +148,8 @@ void intel_gmch_bar_teardown(struct drm_i915_private *i915) int intel_gmch_vga_set_state(struct drm_i915_private *i915, bool enable_decode) { - unsigned int reg = DISPLAY_VER(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; + struct intel_display *display = i915->display; + unsigned int reg = DISPLAY_VER(display) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; u16 gmch_ctrl; if (pci_read_config_word(i915->gmch.pdev, reg, &gmch_ctrl)) { diff --git a/drivers/gpu/drm/i915/vlv_suspend.c b/drivers/gpu/drm/i915/vlv_suspend.c index fc9f311ea1db..221e4c0b2c58 100644 --- a/drivers/gpu/drm/i915/vlv_suspend.c +++ b/drivers/gpu/drm/i915/vlv_suspend.c @@ -8,16 +8,17 @@ #include <drm/drm_print.h> +#include "gt/intel_gt_regs.h" + #include "i915_drv.h" #include "i915_reg.h" #include "i915_trace.h" #include "i915_utils.h" +#include "i915_wait_util.h" #include "intel_clock_gating.h" #include "intel_uncore_trace.h" #include "vlv_suspend.h" -#include "gt/intel_gt_regs.h" - struct vlv_s0ix_state { /* GAM */ u32 wr_watermark; diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig index 3bfa2ac212dc..682dd2633d0c 100644 --- a/drivers/gpu/drm/imagination/Kconfig +++ b/drivers/gpu/drm/imagination/Kconfig @@ -3,8 +3,9 @@ config DRM_POWERVR tristate "Imagination Technologies PowerVR (Series 6 and later) & IMG Graphics" - depends on ARM64 + depends on (ARM64 || RISCV && 64BIT) depends on DRM + depends on MMU depends on PM select DRM_EXEC select DRM_GEM_SHMEM_HELPER diff --git a/drivers/gpu/drm/imagination/pvr_device.c b/drivers/gpu/drm/imagination/pvr_device.c index 8b9ba4983c4c..294b6019b415 100644 --- a/drivers/gpu/drm/imagination/pvr_device.c +++ b/drivers/gpu/drm/imagination/pvr_device.c @@ -23,6 +23,7 @@ #include <linux/firmware.h> #include <linux/gfp.h> #include <linux/interrupt.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/reset.h> @@ -121,21 +122,6 @@ static int pvr_device_clk_init(struct pvr_device *pvr_dev) return 0; } -static int pvr_device_reset_init(struct pvr_device *pvr_dev) -{ - struct drm_device *drm_dev = from_pvr_device(pvr_dev); - struct reset_control *reset; - - reset = devm_reset_control_get_optional_exclusive(drm_dev->dev, NULL); - if (IS_ERR(reset)) - return dev_err_probe(drm_dev->dev, PTR_ERR(reset), - "failed to get gpu reset line\n"); - - pvr_dev->reset = reset; - - return 0; -} - /** * pvr_device_process_active_queues() - Process all queue related events. * @pvr_dev: PowerVR device to check @@ -618,6 +604,9 @@ pvr_device_init(struct pvr_device *pvr_dev) struct device *dev = drm_dev->dev; int err; + /* Get the platform-specific data based on the compatible string. */ + pvr_dev->device_data = of_device_get_match_data(dev); + /* * Setup device parameters. We do this first in case other steps * depend on them. @@ -631,8 +620,7 @@ pvr_device_init(struct pvr_device *pvr_dev) if (err) return err; - /* Get the reset line for the GPU */ - err = pvr_device_reset_init(pvr_dev); + err = pvr_dev->device_data->pwr_ops->init(pvr_dev); if (err) return err; diff --git a/drivers/gpu/drm/imagination/pvr_device.h b/drivers/gpu/drm/imagination/pvr_device.h index 7cb01c38d2a9..ab8f56ae15df 100644 --- a/drivers/gpu/drm/imagination/pvr_device.h +++ b/drivers/gpu/drm/imagination/pvr_device.h @@ -37,6 +37,9 @@ struct clk; /* Forward declaration from <linux/firmware.h>. */ struct firmware; +/* Forward declaration from <linux/pwrseq/consumer.h> */ +struct pwrseq_desc; + /** * struct pvr_gpu_id - Hardware GPU ID information for a PowerVR device * @b: Branch ID. @@ -58,6 +61,14 @@ struct pvr_fw_version { }; /** + * struct pvr_device_data - Platform specific data associated with a compatible string. + * @pwr_ops: Pointer to a structure with platform-specific power functions. + */ +struct pvr_device_data { + const struct pvr_power_sequence_ops *pwr_ops; +}; + +/** * struct pvr_device - powervr-specific wrapper for &struct drm_device */ struct pvr_device { @@ -98,6 +109,9 @@ struct pvr_device { /** @fw_version: Firmware version detected at runtime. */ struct pvr_fw_version fw_version; + /** @device_data: Pointer to platform-specific data. */ + const struct pvr_device_data *device_data; + /** @regs_resource: Resource representing device control registers. */ struct resource *regs_resource; @@ -148,6 +162,9 @@ struct pvr_device { */ struct reset_control *reset; + /** @pwrseq: Pointer to a power sequencer, if one is used. */ + struct pwrseq_desc *pwrseq; + /** @irq: IRQ number. */ int irq; diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index b058ec183bb3..916b40ced7eb 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -1480,15 +1480,33 @@ static void pvr_remove(struct platform_device *plat_dev) pvr_power_domains_fini(pvr_dev); } +static const struct pvr_device_data pvr_device_data_manual = { + .pwr_ops = &pvr_power_sequence_ops_manual, +}; + +static const struct pvr_device_data pvr_device_data_pwrseq = { + .pwr_ops = &pvr_power_sequence_ops_pwrseq, +}; + static const struct of_device_id dt_match[] = { - { .compatible = "img,img-rogue", .data = NULL }, + { + .compatible = "thead,th1520-gpu", + .data = &pvr_device_data_pwrseq, + }, + { + .compatible = "img,img-rogue", + .data = &pvr_device_data_manual, + }, /* * This legacy compatible string was introduced early on before the more generic * "img,img-rogue" was added. Keep it around here for compatibility, but never use * "img,img-axe" in new devicetrees. */ - { .compatible = "img,img-axe", .data = NULL }, + { + .compatible = "img,img-axe", + .data = &pvr_device_data_manual, + }, {} }; MODULE_DEVICE_TABLE(of, dt_match); @@ -1513,4 +1531,5 @@ MODULE_DESCRIPTION(PVR_DRIVER_DESC); MODULE_LICENSE("Dual MIT/GPL"); MODULE_IMPORT_NS("DMA_BUF"); MODULE_FIRMWARE("powervr/rogue_33.15.11.3_v1.fw"); +MODULE_FIRMWARE("powervr/rogue_36.52.104.182_v1.fw"); MODULE_FIRMWARE("powervr/rogue_36.53.104.796_v1.fw"); diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 41f5d89e78b8..c6e7ff9e935d 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -18,6 +18,7 @@ #include <linux/platform_device.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> +#include <linux/pwrseq/consumer.h> #include <linux/reset.h> #include <linux/timer.h> #include <linux/types.h> @@ -234,6 +235,118 @@ pvr_watchdog_init(struct pvr_device *pvr_dev) return 0; } +static int pvr_power_init_manual(struct pvr_device *pvr_dev) +{ + struct drm_device *drm_dev = from_pvr_device(pvr_dev); + struct reset_control *reset; + + reset = devm_reset_control_get_optional_exclusive(drm_dev->dev, NULL); + if (IS_ERR(reset)) + return dev_err_probe(drm_dev->dev, PTR_ERR(reset), + "failed to get gpu reset line\n"); + + pvr_dev->reset = reset; + + return 0; +} + +static int pvr_power_on_sequence_manual(struct pvr_device *pvr_dev) +{ + int err; + + err = clk_prepare_enable(pvr_dev->core_clk); + if (err) + return err; + + err = clk_prepare_enable(pvr_dev->sys_clk); + if (err) + goto err_core_clk_disable; + + err = clk_prepare_enable(pvr_dev->mem_clk); + if (err) + goto err_sys_clk_disable; + + /* + * According to the hardware manual, a delay of at least 32 clock + * cycles is required between de-asserting the clkgen reset and + * de-asserting the GPU reset. Assuming a worst-case scenario with + * a very high GPU clock frequency, a delay of 1 microsecond is + * sufficient to ensure this requirement is met across all + * feasible GPU clock speeds. + */ + udelay(1); + + err = reset_control_deassert(pvr_dev->reset); + if (err) + goto err_mem_clk_disable; + + return 0; + +err_mem_clk_disable: + clk_disable_unprepare(pvr_dev->mem_clk); + +err_sys_clk_disable: + clk_disable_unprepare(pvr_dev->sys_clk); + +err_core_clk_disable: + clk_disable_unprepare(pvr_dev->core_clk); + + return err; +} + +static int pvr_power_off_sequence_manual(struct pvr_device *pvr_dev) +{ + int err; + + err = reset_control_assert(pvr_dev->reset); + + clk_disable_unprepare(pvr_dev->mem_clk); + clk_disable_unprepare(pvr_dev->sys_clk); + clk_disable_unprepare(pvr_dev->core_clk); + + return err; +} + +const struct pvr_power_sequence_ops pvr_power_sequence_ops_manual = { + .init = pvr_power_init_manual, + .power_on = pvr_power_on_sequence_manual, + .power_off = pvr_power_off_sequence_manual, +}; + +static int pvr_power_init_pwrseq(struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + + pvr_dev->pwrseq = devm_pwrseq_get(dev, "gpu-power"); + if (IS_ERR(pvr_dev->pwrseq)) { + /* + * This platform requires a sequencer. If we can't get it, we + * must return the error (including -EPROBE_DEFER to wait for + * the provider to appear) + */ + return dev_err_probe(dev, PTR_ERR(pvr_dev->pwrseq), + "Failed to get required power sequencer\n"); + } + + return 0; +} + +static int pvr_power_on_sequence_pwrseq(struct pvr_device *pvr_dev) +{ + return pwrseq_power_on(pvr_dev->pwrseq); +} + +static int pvr_power_off_sequence_pwrseq(struct pvr_device *pvr_dev) +{ + return pwrseq_power_off(pvr_dev->pwrseq); +} + +const struct pvr_power_sequence_ops pvr_power_sequence_ops_pwrseq = { + .init = pvr_power_init_pwrseq, + .power_on = pvr_power_on_sequence_pwrseq, + .power_off = pvr_power_off_sequence_pwrseq, +}; + int pvr_power_device_suspend(struct device *dev) { @@ -252,11 +365,7 @@ pvr_power_device_suspend(struct device *dev) goto err_drm_dev_exit; } - clk_disable_unprepare(pvr_dev->mem_clk); - clk_disable_unprepare(pvr_dev->sys_clk); - clk_disable_unprepare(pvr_dev->core_clk); - - err = reset_control_assert(pvr_dev->reset); + err = pvr_dev->device_data->pwr_ops->power_off(pvr_dev); err_drm_dev_exit: drm_dev_exit(idx); @@ -276,53 +385,22 @@ pvr_power_device_resume(struct device *dev) if (!drm_dev_enter(drm_dev, &idx)) return -EIO; - err = clk_prepare_enable(pvr_dev->core_clk); + err = pvr_dev->device_data->pwr_ops->power_on(pvr_dev); if (err) goto err_drm_dev_exit; - err = clk_prepare_enable(pvr_dev->sys_clk); - if (err) - goto err_core_clk_disable; - - err = clk_prepare_enable(pvr_dev->mem_clk); - if (err) - goto err_sys_clk_disable; - - /* - * According to the hardware manual, a delay of at least 32 clock - * cycles is required between de-asserting the clkgen reset and - * de-asserting the GPU reset. Assuming a worst-case scenario with - * a very high GPU clock frequency, a delay of 1 microsecond is - * sufficient to ensure this requirement is met across all - * feasible GPU clock speeds. - */ - udelay(1); - - err = reset_control_deassert(pvr_dev->reset); - if (err) - goto err_mem_clk_disable; - if (pvr_dev->fw_dev.booted) { err = pvr_power_fw_enable(pvr_dev); if (err) - goto err_reset_assert; + goto err_power_off; } drm_dev_exit(idx); return 0; -err_reset_assert: - reset_control_assert(pvr_dev->reset); - -err_mem_clk_disable: - clk_disable_unprepare(pvr_dev->mem_clk); - -err_sys_clk_disable: - clk_disable_unprepare(pvr_dev->sys_clk); - -err_core_clk_disable: - clk_disable_unprepare(pvr_dev->core_clk); +err_power_off: + pvr_dev->device_data->pwr_ops->power_off(pvr_dev); err_drm_dev_exit: drm_dev_exit(idx); @@ -340,6 +418,63 @@ pvr_power_device_idle(struct device *dev) return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY; } +static int +pvr_power_clear_error(struct pvr_device *pvr_dev) +{ + struct device *dev = from_pvr_device(pvr_dev)->dev; + int err; + + /* Ensure the device state is known and nothing is happening past this point */ + pm_runtime_disable(dev); + + /* Attempt to clear the runtime PM error by setting the current state again */ + if (pm_runtime_status_suspended(dev)) + err = pm_runtime_set_suspended(dev); + else + err = pm_runtime_set_active(dev); + + if (err) { + drm_err(from_pvr_device(pvr_dev), + "%s: Failed to clear runtime PM error (new error %d)\n", + __func__, err); + } + + pm_runtime_enable(dev); + + return err; +} + +/** + * pvr_power_get_clear() - Acquire a power reference, correcting any errors + * @pvr_dev: Device pointer + * + * Attempt to acquire a power reference on the device. If the runtime PM + * is in error state, attempt to clear the error and retry. + * + * Returns: + * * 0 on success, or + * * Any error code returned by pvr_power_get() or the runtime PM API. + */ +static int +pvr_power_get_clear(struct pvr_device *pvr_dev) +{ + int err; + + err = pvr_power_get(pvr_dev); + if (err == 0) + return err; + + drm_warn(from_pvr_device(pvr_dev), + "%s: pvr_power_get returned error %d, attempting recovery\n", + __func__, err); + + err = pvr_power_clear_error(pvr_dev); + if (err) + return err; + + return pvr_power_get(pvr_dev); +} + /** * pvr_power_reset() - Reset the GPU * @pvr_dev: Device pointer @@ -364,7 +499,7 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) * Take a power reference during the reset. This should prevent any interference with the * power state during reset. */ - WARN_ON(pvr_power_get(pvr_dev)); + WARN_ON(pvr_power_get_clear(pvr_dev)); down_write(&pvr_dev->reset_sem); @@ -386,13 +521,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; - WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev)); err = pvr_fw_hard_reset(pvr_dev); if (err) goto err_device_lost; - err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev); pvr_dev->fw_dev.booted = true; if (err) goto err_device_lost; diff --git a/drivers/gpu/drm/imagination/pvr_power.h b/drivers/gpu/drm/imagination/pvr_power.h index ada85674a7ca..b853d092242c 100644 --- a/drivers/gpu/drm/imagination/pvr_power.h +++ b/drivers/gpu/drm/imagination/pvr_power.h @@ -41,4 +41,19 @@ pvr_power_put(struct pvr_device *pvr_dev) int pvr_power_domains_init(struct pvr_device *pvr_dev); void pvr_power_domains_fini(struct pvr_device *pvr_dev); +/** + * struct pvr_power_sequence_ops - Platform specific power sequence operations. + * @init: Pointer to the platform-specific initialization function. + * @power_on: Pointer to the platform-specific power on function. + * @power_off: Pointer to the platform-specific power off function. + */ +struct pvr_power_sequence_ops { + int (*init)(struct pvr_device *pvr_dev); + int (*power_on)(struct pvr_device *pvr_dev); + int (*power_off)(struct pvr_device *pvr_dev); +}; + +extern const struct pvr_power_sequence_ops pvr_power_sequence_ops_manual; +extern const struct pvr_power_sequence_ops pvr_power_sequence_ops_pwrseq; + #endif /* PVR_POWER_H */ diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 5a41ee79fed6..fc415dd0d7a7 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -803,7 +803,7 @@ static void pvr_queue_start(struct pvr_queue *queue) * the scheduler, and re-assign parent fences in the middle. * * Return: - * * DRM_GPU_SCHED_STAT_NOMINAL. + * * DRM_GPU_SCHED_STAT_RESET. */ static enum drm_gpu_sched_stat pvr_queue_timedout_job(struct drm_sched_job *s_job) @@ -854,7 +854,7 @@ pvr_queue_timedout_job(struct drm_sched_job *s_job) drm_sched_start(sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /** diff --git a/drivers/gpu/drm/imagination/pvr_vm.c b/drivers/gpu/drm/imagination/pvr_vm.c index 2896fa7501b1..3d97990170bf 100644 --- a/drivers/gpu/drm/imagination/pvr_vm.c +++ b/drivers/gpu/drm/imagination/pvr_vm.c @@ -185,12 +185,17 @@ struct pvr_vm_bind_op { static int pvr_vm_bind_op_exec(struct pvr_vm_bind_op *bind_op) { switch (bind_op->type) { - case PVR_VM_BIND_TYPE_MAP: + case PVR_VM_BIND_TYPE_MAP: { + const struct drm_gpuvm_map_req map_req = { + .map.va.addr = bind_op->device_addr, + .map.va.range = bind_op->size, + .map.gem.obj = gem_from_pvr_gem(bind_op->pvr_obj), + .map.gem.offset = bind_op->offset, + }; + return drm_gpuvm_sm_map(&bind_op->vm_ctx->gpuvm_mgr, - bind_op, bind_op->device_addr, - bind_op->size, - gem_from_pvr_gem(bind_op->pvr_obj), - bind_op->offset); + bind_op, &map_req); + } case PVR_VM_BIND_TYPE_UNMAP: return drm_gpuvm_sm_unmap(&bind_op->vm_ctx->gpuvm_mgr, diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index f851e9ffdb28..9db1ceaed518 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -901,14 +901,15 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) static struct drm_framebuffer * ingenic_drm_gem_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct ingenic_drm *priv = drm_device_get_priv(drm); if (priv->soc_info->map_noncoherent) - return drm_gem_fb_create_with_dirty(drm, file, mode_cmd); + return drm_gem_fb_create_with_dirty(drm, file, info, mode_cmd); - return drm_gem_fb_create(drm, file, mode_cmd); + return drm_gem_fb_create(drm, file, info, mode_cmd); } static struct drm_gem_object * diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index 11ace5cebf4c..65210ab081bb 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -362,8 +362,8 @@ static const struct bin_attribute lima_error_state_attr = { .attr.name = "error", .attr.mode = 0600, .size = 0, - .read_new = lima_error_state_read, - .write_new = lima_error_state_write, + .read = lima_error_state_read, + .write = lima_error_state_write, }; static int lima_pdev_probe(struct platform_device *pdev) diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 954f4325b859..739e8c6c6d90 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -412,7 +412,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job */ if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s spurious timeout\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -429,7 +429,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s unexpectedly high interrupt latency\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -467,7 +467,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job drm_sched_resubmit_jobs(&pipe->base); drm_sched_start(&pipe->base, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void lima_sched_free_job(struct drm_sched_job *job) diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index 8f6fba4217ec..bc7527542fdc 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -719,6 +719,39 @@ int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, return 0; } +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane) +{ +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + struct mtk_crtc *mtk_crtc = to_mtk_crtc(crtc); + struct mtk_plane_state *plane_state = to_mtk_plane_state(plane->state); + int i; + + /* no need to wait for disabling the plane by CPU */ + if (!mtk_crtc->cmdq_client.chan) + return; + + if (!mtk_crtc->enabled) + return; + + /* set pending plane state to disabled */ + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *mtk_plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(mtk_plane->state); + + if (mtk_plane->index == plane->index) { + memcpy(mtk_plane_state, plane_state, sizeof(*plane_state)); + break; + } + } + mtk_crtc_update_config(mtk_crtc, false); + + /* wait for planes to be disabled by CMDQ */ + wait_event_timeout(mtk_crtc->cb_blocking_queue, + mtk_crtc->cmdq_vblank_cnt == 0, + msecs_to_jiffies(500)); +#endif +} + void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *state) { @@ -930,7 +963,8 @@ static int mtk_crtc_init_comp_planes(struct drm_device *drm_dev, mtk_ddp_comp_supported_rotations(comp), mtk_ddp_comp_get_blend_modes(comp), mtk_ddp_comp_get_formats(comp), - mtk_ddp_comp_get_num_formats(comp), i); + mtk_ddp_comp_get_num_formats(comp), + mtk_ddp_comp_is_afbc_supported(comp), i); if (ret) return ret; diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.h b/drivers/gpu/drm/mediatek/mtk_crtc.h index 388e900b6f4d..828f109b83e7 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.h +++ b/drivers/gpu/drm/mediatek/mtk_crtc.h @@ -21,6 +21,7 @@ int mtk_crtc_create(struct drm_device *drm_dev, const unsigned int *path, unsigned int num_conn_routes); int mtk_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane, struct mtk_plane_state *state); +void mtk_crtc_plane_disable(struct drm_crtc *crtc, struct drm_plane *plane); void mtk_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *plane_state); struct device *mtk_crtc_dma_dev_get(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c index edc6417639e6..ac6620e10262 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.c @@ -366,6 +366,7 @@ static const struct mtk_ddp_comp_funcs ddp_ovl = { .get_blend_modes = mtk_ovl_get_blend_modes, .get_formats = mtk_ovl_get_formats, .get_num_formats = mtk_ovl_get_num_formats, + .is_afbc_supported = mtk_ovl_is_afbc_supported, }; static const struct mtk_ddp_comp_funcs ddp_postmask = { diff --git a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h index 39720b27f4e9..7289b3dcf22f 100644 --- a/drivers/gpu/drm/mediatek/mtk_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_ddp_comp.h @@ -83,6 +83,7 @@ struct mtk_ddp_comp_funcs { u32 (*get_blend_modes)(struct device *dev); const u32 *(*get_formats)(struct device *dev); size_t (*get_num_formats)(struct device *dev); + bool (*is_afbc_supported)(struct device *dev); void (*connect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*disconnect)(struct device *dev, struct device *mmsys_dev, unsigned int next); void (*add)(struct device *dev, struct mtk_mutex *mutex); @@ -294,6 +295,14 @@ size_t mtk_ddp_comp_get_num_formats(struct mtk_ddp_comp *comp) return 0; } +static inline bool mtk_ddp_comp_is_afbc_supported(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->is_afbc_supported) + return comp->funcs->is_afbc_supported(comp->dev); + + return false; +} + static inline bool mtk_ddp_comp_add(struct mtk_ddp_comp *comp, struct mtk_mutex *mutex) { if (comp->funcs && comp->funcs->add) { diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index 04217a36939c..679d413bf10b 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -106,6 +106,7 @@ void mtk_ovl_disable_vblank(struct device *dev); u32 mtk_ovl_get_blend_modes(struct device *dev); const u32 *mtk_ovl_get_formats(struct device *dev); size_t mtk_ovl_get_num_formats(struct device *dev); +bool mtk_ovl_is_afbc_supported(struct device *dev); void mtk_ovl_adaptor_add_comp(struct device *dev, struct mtk_mutex *mutex); void mtk_ovl_adaptor_remove_comp(struct device *dev, struct mtk_mutex *mutex); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index d0581c4e3c99..e0236353d499 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -236,6 +236,13 @@ size_t mtk_ovl_get_num_formats(struct device *dev) return ovl->data->num_formats; } +bool mtk_ovl_is_afbc_supported(struct device *dev) +{ + struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); + + return ovl->data->supports_afbc; +} + int mtk_ovl_clk_enable(struct device *dev) { struct mtk_disp_ovl *ovl = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index a5b10b2545dc..bef6eeb30d3e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2118,7 +2118,8 @@ static void mtk_dp_update_plugged_status(struct mtk_dp *mtk_dp) mutex_unlock(&mtk_dp->update_plugged_status_lock); } -static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) +static enum drm_connector_status +mtk_dp_bdg_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 8b58421bc39d..61cab32e213a 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -1095,7 +1095,6 @@ static const u32 mt8183_output_fmts[] = { }; static const u32 mt8195_dpi_output_fmts[] = { - MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_RGB888_1X24, MEDIA_BUS_FMT_RGB888_2X12_LE, MEDIA_BUS_FMT_RGB888_2X12_BE, @@ -1103,18 +1102,19 @@ static const u32 mt8195_dpi_output_fmts[] = { MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV10_1X20, MEDIA_BUS_FMT_YUYV12_1X24, + MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_YUV8_1X24, MEDIA_BUS_FMT_YUV10_1X30, }; static const u32 mt8195_dp_intf_output_fmts[] = { - MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_RGB888_1X24, MEDIA_BUS_FMT_RGB888_2X12_LE, MEDIA_BUS_FMT_RGB888_2X12_BE, MEDIA_BUS_FMT_RGB101010_1X30, MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV10_1X20, + MEDIA_BUS_FMT_BGR888_1X24, MEDIA_BUS_FMT_YUV8_1X24, MEDIA_BUS_FMT_YUV10_1X30, }; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 7c0c12dde488..31ff2922758a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -43,14 +43,13 @@ static const struct drm_mode_config_helper_funcs mtk_drm_mode_config_helpers = { static struct drm_framebuffer * mtk_drm_mode_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, cmd); - if (info->num_planes != 1) return ERR_PTR(-EINVAL); - return drm_gem_fb_create(dev, file, cmd); + return drm_gem_fb_create(dev, file, info, cmd); } static const struct drm_mode_config_funcs mtk_drm_mode_config_funcs = { @@ -395,10 +394,12 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev) continue; drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match); + put_device(&pdev->dev); if (!drm_dev) continue; temp_drm_priv = dev_get_drvdata(drm_dev); + put_device(drm_dev); if (!temp_drm_priv) continue; @@ -685,10 +686,6 @@ err_free: for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; err_put_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); return ret; } @@ -696,18 +693,12 @@ err_put_dev: static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); - int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); - - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); } private->mtk_drm_bound = false; diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index d7726091819c..0e2bcd5f67b7 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -1002,6 +1002,12 @@ static int mtk_dsi_host_attach(struct mipi_dsi_host *host, return PTR_ERR(dsi->next_bridge); } + /* + * set flag to request the DSI host bridge be pre-enabled before device bridge + * in the chain, so the DSI host is ready when the device bridge is pre-enabled + */ + dsi->next_bridge->pre_enable_prev_first = true; + drm_bridge_add(&dsi->bridge); ret = component_add(host->dev, &mtk_dsi_component_ops); diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 6943cdc77dec..b766dd5e6c8d 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -182,8 +182,8 @@ static inline struct mtk_hdmi *hdmi_ctx_from_bridge(struct drm_bridge *b) static void mtk_hdmi_hw_vid_black(struct mtk_hdmi *hdmi, bool black) { - regmap_update_bits(hdmi->regs, VIDEO_SOURCE_SEL, - VIDEO_CFG_4, black ? GEN_RGB : NORMAL_PATH); + regmap_update_bits(hdmi->regs, VIDEO_CFG_4, + VIDEO_SOURCE_SEL, black ? GEN_RGB : NORMAL_PATH); } static void mtk_hdmi_hw_make_reg_writable(struct mtk_hdmi *hdmi, bool enable) @@ -310,8 +310,8 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer, static void mtk_hdmi_hw_send_aud_packet(struct mtk_hdmi *hdmi, bool enable) { - regmap_update_bits(hdmi->regs, AUDIO_PACKET_OFF, - GRL_SHIFT_R2, enable ? 0 : AUDIO_PACKET_OFF); + regmap_update_bits(hdmi->regs, GRL_SHIFT_R2, + AUDIO_PACKET_OFF, enable ? 0 : AUDIO_PACKET_OFF); } static void mtk_hdmi_hw_config_sys(struct mtk_hdmi *hdmi) @@ -1174,7 +1174,8 @@ static void mtk_hdmi_hpd_event(bool hpd, struct device *dev) * Bridge callbacks */ -static enum drm_connector_status mtk_hdmi_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +mtk_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index 655106bbb76d..02349bd44001 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -285,9 +285,15 @@ static void mtk_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state); + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + mtk_plane_state->pending.enable = false; wmb(); /* Make sure the above parameter is set before update */ mtk_plane_state->pending.dirty = true; + + if (old_state && old_state->crtc) + mtk_crtc_plane_disable(old_state->crtc, plane); } static void mtk_plane_atomic_update(struct drm_plane *plane, @@ -321,7 +327,8 @@ static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx) + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx) { int err; @@ -332,7 +339,9 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, err = drm_universal_plane_init(dev, plane, possible_crtcs, &mtk_plane_funcs, formats, - num_formats, modifiers, type, NULL); + num_formats, + supports_afbc ? modifiers : NULL, + type, NULL); if (err) { DRM_ERROR("failed to initialize plane\n"); return err; diff --git a/drivers/gpu/drm/mediatek/mtk_plane.h b/drivers/gpu/drm/mediatek/mtk_plane.h index 3b13b89989c7..95c5fa5295d8 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.h +++ b/drivers/gpu/drm/mediatek/mtk_plane.h @@ -49,5 +49,6 @@ to_mtk_plane_state(struct drm_plane_state *state) int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane, unsigned long possible_crtcs, enum drm_plane_type type, unsigned int supported_rotations, const u32 blend_modes, - const u32 *formats, size_t num_formats, unsigned int plane_idx); + const u32 *formats, size_t num_formats, + bool supports_afbc, unsigned int plane_idx); #endif diff --git a/drivers/gpu/drm/mgag200/mgag200_ddc.c b/drivers/gpu/drm/mgag200/mgag200_ddc.c index 6d81ea8931e8..c31673eaa554 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ddc.c +++ b/drivers/gpu/drm/mgag200/mgag200_ddc.c @@ -26,7 +26,6 @@ * Authors: Dave Airlie <airlied@redhat.com> */ -#include <linux/export.h> #include <linux/i2c-algo-bit.h> #include <linux/i2c.h> #include <linux/pci.h> diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 959cf53be4b8..41018e82efa1 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -265,8 +265,8 @@ static struct msm_dp_audio_private *msm_dp_audio_get_data(struct msm_dp *msm_dp_ return container_of(msm_dp_audio, struct msm_dp_audio_private, msm_dp_audio); } -int msm_dp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { @@ -308,8 +308,8 @@ end: return rc; } -void msm_dp_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void msm_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connecter) { struct msm_dp_audio_private *audio; struct msm_dp *msm_dp_display; diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h index 842278516c99..ce2342856adb 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.h +++ b/drivers/gpu/drm/msm/dp/dp_audio.h @@ -45,12 +45,12 @@ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, */ void msm_dp_audio_put(struct msm_dp_audio *msm_dp_audio); -int msm_dp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params); -void msm_dp_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); +void msm_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); #endif /* _DP_AUDIO_H_ */ diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index f222d7ccaa88..9a461ab2f32f 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -20,7 +20,8 @@ * @bridge: Pointer to drm bridge structure * Returns: Bridge's 'is connected' status */ -static enum drm_connector_status msm_dp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +msm_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct msm_dp *dp; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index d5e572d10d6a..02cfd46df594 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -200,12 +200,12 @@ struct hdmi_codec_daifmt; struct hdmi_codec_params; int msm_hdmi_audio_update(struct hdmi *hdmi); -int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_hdmi_bridge_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params); -void msm_hdmi_bridge_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); +void msm_hdmi_bridge_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); /* * hdmi bridge: @@ -215,7 +215,7 @@ int msm_hdmi_bridge_init(struct hdmi *hdmi); void msm_hdmi_hpd_irq(struct drm_bridge *bridge); enum drm_connector_status msm_hdmi_bridge_detect( - struct drm_bridge *bridge); + struct drm_bridge *bridge, struct drm_connector *connector); void msm_hdmi_hpd_enable(struct drm_bridge *bridge); void msm_hdmi_hpd_disable(struct drm_bridge *bridge); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c index b9ec14ef2c20..d9a8dc9dae8f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c @@ -122,8 +122,8 @@ int msm_hdmi_audio_update(struct hdmi *hdmi) return 0; } -int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_hdmi_bridge_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { @@ -163,8 +163,8 @@ int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, return msm_hdmi_audio_update(hdmi); } -void msm_hdmi_bridge_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void msm_hdmi_bridge_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); struct hdmi *hdmi = hdmi_bridge->hdmi; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c index 53a7ce8cc7bc..46fd58646d32 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c @@ -475,7 +475,7 @@ msm_hdmi_hotplug_work(struct work_struct *work) container_of(work, struct hdmi_bridge, hpd_work); struct drm_bridge *bridge = &hdmi_bridge->base; - drm_bridge_hpd_notify(bridge, drm_bridge_detect(bridge)); + drm_bridge_hpd_notify(bridge, drm_bridge_detect(bridge, hdmi_bridge->hdmi->connector)); } /* initialize bridge */ diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c index 407e6c449ee0..114b0d507700 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c @@ -177,8 +177,8 @@ static enum drm_connector_status detect_gpio(struct hdmi *hdmi) connector_status_disconnected; } -enum drm_connector_status msm_hdmi_bridge_detect( - struct drm_bridge *bridge) +enum drm_connector_status +msm_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); struct hdmi *hdmi = hdmi_bridge->hdmi; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index f4c20dc91cc6..6d847d593f1a 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -260,7 +260,8 @@ uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int plane); struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane); const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb); struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * msm_alloc_stolen_fb(struct drm_device *dev, int w, int h, int p, uint32_t format); diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index bc7c2bb8f01e..1eff615ff9bf 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -30,6 +30,7 @@ struct msm_framebuffer { #define to_msm_framebuffer(x) container_of(x, struct msm_framebuffer, base) static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); static int msm_framebuffer_dirtyfb(struct drm_framebuffer *fb, @@ -139,10 +140,9 @@ const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb) } struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); struct drm_gem_object *bos[4] = {0}; struct drm_framebuffer *fb; int ret, i, n = info->num_planes; @@ -155,7 +155,7 @@ struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, } } - fb = msm_framebuffer_init(dev, mode_cmd, bos); + fb = msm_framebuffer_init(dev, info, mode_cmd, bos); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto out_unref; @@ -170,10 +170,9 @@ out_unref: } static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct msm_framebuffer *msm_fb = NULL; @@ -227,7 +226,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, msm_fb->base.obj[i] = bos[i]; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &msm_framebuffer_funcs); if (ret) { @@ -276,7 +275,10 @@ msm_alloc_stolen_fb(struct drm_device *dev, int w, int h, int p, uint32_t format msm_gem_object_set_name(bo, "stolenfb"); - fb = msm_framebuffer_init(dev, &mode_cmd, &bo); + fb = msm_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &bo); if (IS_ERR(fb)) { DRM_DEV_ERROR(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a9d21a7b8984..9f7fbe577abb 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -9,7 +9,6 @@ #include <linux/spinlock.h> #include <linux/shmem_fs.h> #include <linux/dma-buf.h> -#include <linux/pfn_t.h> #include <drm/drm_prime.h> #include <drm/drm_file.h> diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index c112a46a5484..89a95977f41e 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -396,7 +396,14 @@ msm_gem_vma_new(struct drm_gpuvm *gpuvm, struct drm_gem_object *obj, if (obj) GEM_WARN_ON((range_end - range_start) > obj->size); - drm_gpuva_init(&vma->base, range_start, range_end - range_start, obj, offset); + struct drm_gpuva_op_map op_map = { + .va.addr = range_start, + .va.range = range_end - range_start, + .gem.obj = obj, + .gem.offset = offset, + }; + + drm_gpuva_init_from_op(&vma->base, &op_map); vma->mapped = false; ret = drm_gpuva_insert(&vm->base, &vma->base); @@ -1225,11 +1232,17 @@ vm_bind_job_lock_objects(struct msm_vm_bind_job *job, struct drm_exec *exec) op->obj_offset); break; case MSM_VM_BIND_OP_MAP: - case MSM_VM_BIND_OP_MAP_NULL: - ret = drm_gpuvm_sm_map_exec_lock(job->vm, exec, 1, - op->iova, op->range, - op->obj, op->obj_offset); + case MSM_VM_BIND_OP_MAP_NULL: { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = op->iova, + .map.va.range = op->range, + .map.gem.obj = op->obj, + .map.gem.offset = op->obj_offset, + }; + + ret = drm_gpuvm_sm_map_exec_lock(job->vm, exec, 1, &map_req); break; + } default: /* * lookup_op() should have already thrown an error for @@ -1337,10 +1350,17 @@ vm_bind_job_prepare(struct msm_vm_bind_job *job) if (op->flags & MSM_VM_BIND_OP_DUMP) arg.flags |= MSM_VMA_DUMP; fallthrough; - case MSM_VM_BIND_OP_MAP_NULL: - ret = drm_gpuvm_sm_map(job->vm, &arg, op->iova, - op->range, op->obj, op->obj_offset); + case MSM_VM_BIND_OP_MAP_NULL: { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = op->iova, + .map.va.range = op->range, + .map.gem.obj = op->obj, + .map.gem.offset = op->obj_offset, + }; + + ret = drm_gpuvm_sm_map(job->vm, &arg, &map_req); break; + } default: /* * lookup_op() should have already thrown an error for diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c b/drivers/gpu/drm/mxsfb/lcdif_kms.c index dbd42cc1da87..1c3b33be6c40 100644 --- a/drivers/gpu/drm/mxsfb/lcdif_kms.c +++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c @@ -433,7 +433,6 @@ static int lcdif_crtc_atomic_check(struct drm_crtc *crtc, struct drm_connector *connector; struct drm_encoder *encoder; struct drm_bridge_state *bridge_state; - struct drm_bridge *bridge; u32 bus_format, bus_flags; bool format_set = false, flags_set = false; int ret, i; @@ -453,7 +452,8 @@ static int lcdif_crtc_atomic_check(struct drm_crtc *crtc, encoder = connector_state->best_encoder; - bridge = drm_bridge_chain_get_first_bridge(encoder); + struct drm_bridge *bridge __free(drm_bridge_put) = + drm_bridge_chain_get_first_bridge(encoder); if (!bridge) continue; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index c183b1112bc4..0b756da2fec2 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -91,20 +91,15 @@ void mxsfb_disable_axi_clk(struct mxsfb_drm_private *mxsfb) static struct drm_framebuffer * mxsfb_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info; - - info = drm_get_format_info(dev, mode_cmd); - if (!info) - return ERR_PTR(-EINVAL); - if (mode_cmd->width * info->cpp[0] != mode_cmd->pitches[0]) { dev_dbg(dev->dev, "Invalid pitch: fb width must match pitch\n"); return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } static const struct drm_mode_config_funcs mxsfb_mode_config_funcs = { diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index d1587639ebb0..c88776d1e784 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -102,14 +102,6 @@ config DRM_NOUVEAU_SVM Say Y here if you want to enable experimental support for Shared Virtual Memory (SVM). -config DRM_NOUVEAU_GSP_DEFAULT - bool "Use GSP firmware for Turing/Ampere (needs firmware installed)" - depends on DRM_NOUVEAU - default n - help - Say Y here if you want to use the GSP codepaths by default on - Turing and Ampere GPUs. - config DRM_NOUVEAU_CH7006 tristate "Chrontel ch7006 TV encoder" depends on DRM_NOUVEAU diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index e5d37eee4301..e97e39abf3a2 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1839,7 +1839,7 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta backlight = nv_connector->backlight; if (backlight && backlight->uses_dpcd) drm_edp_backlight_enable(&nv_connector->aux, &backlight->edp_info, - (u16)backlight->dev->props.brightness); + backlight->dev->props.brightness); #endif break; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 11d5b923d6e7..e2c55f4b9c5a 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -795,6 +795,10 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, struct nouveau_drm *drm = nouveau_drm(plane->dev); uint8_t i; + /* All chipsets can display all formats in linear layout */ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return true; + if (drm->client.device.info.chipset < 0xc0) { const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; diff --git a/drivers/gpu/drm/nouveau/gv100_fence.c b/drivers/gpu/drm/nouveau/gv100_fence.c index cccdeca72002..317e516c4ec7 100644 --- a/drivers/gpu/drm/nouveau/gv100_fence.c +++ b/drivers/gpu/drm/nouveau/gv100_fence.c @@ -18,7 +18,7 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) struct nvif_push *push = &chan->chan.push; int ret; - ret = PUSH_WAIT(push, 8); + ret = PUSH_WAIT(push, 13); if (ret) return ret; @@ -32,6 +32,11 @@ gv100_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence) NVDEF(NVC36F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) | NVDEF(NVC36F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS)); + PUSH_MTHD(push, NVC36F, MEM_OP_A, 0, + MEM_OP_B, 0, + MEM_OP_C, NVDEF(NVC36F, MEM_OP_C, MEMBAR_TYPE, SYS_MEMBAR), + MEM_OP_D, NVDEF(NVC36F, MEM_OP_D, OPERATION, MEMBAR)); + PUSH_MTHD(push, NVC36F, NON_STALL_INTERRUPT, 0); PUSH_KICK(push); diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h index 8735dda4c8a7..338f74b9f501 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/clc36f.h @@ -7,6 +7,91 @@ #define NVC36F_NON_STALL_INTERRUPT (0x00000020) #define NVC36F_NON_STALL_INTERRUPT_HANDLE 31:0 +// NOTE - MEM_OP_A and MEM_OP_B have been replaced in gp100 with methods for +// specifying the page address for a targeted TLB invalidate and the uTLB for +// a targeted REPLAY_CANCEL for UVM. +// The previous MEM_OP_A/B functionality is in MEM_OP_C/D, with slightly +// rearranged fields. +#define NVC36F_MEM_OP_A (0x00000028) +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_CLIENT_UNIT_ID 5:0 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_INVALIDATION_SIZE 5:0 // Used to specify size of invalidate, used for invalidates which are not of the REPLAY_CANCEL_TARGETED type +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_TARGET_GPC_ID 10:6 // only relevant for REPLAY_CANCEL_TARGETED +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_CANCEL_MMU_ENGINE_ID 6:0 // only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR 11:11 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_EN 0x00000001 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_SYSMEMBAR_DIS 0x00000000 +#define NVC36F_MEM_OP_A_TLB_INVALIDATE_TARGET_ADDR_LO 31:12 +#define NVC36F_MEM_OP_B (0x0000002c) +#define NVC36F_MEM_OP_B_TLB_INVALIDATE_TARGET_ADDR_HI 31:0 +#define NVC36F_MEM_OP_C (0x00000030) +#define NVC36F_MEM_OP_C_MEMBAR_TYPE 2:0 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_SYS_MEMBAR 0x00000000 +#define NVC36F_MEM_OP_C_MEMBAR_TYPE_MEMBAR 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB 0:0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ALL 0x00000001 // Probably nonsensical for MMU_TLB_INVALIDATE_TARGETED +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC 1:1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_ENABLE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_GPC_DISABLE 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY 4:2 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_START_ACK_ALL 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_TARGETED 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_GLOBAL 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_REPLAY_CANCEL_VA_GLOBAL 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE 6:5 // only relevant if GPC ENABLE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_NONE 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_GLOBALLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACK_TYPE_INTRANODE 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE 9:7 //only relevant for REPLAY_CANCEL_VA_GLOBAL +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_READ 0 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE 1 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_STRONG 2 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_RSVRVD 3 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_WEAK 4 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ATOMIC_ALL 5 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_WRITE_AND_ATOMIC 6 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_ACCESS_TYPE_VIRT_ALL 7 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL 9:7 // Invalidate affects this level and all below +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_ALL 0x00000000 // Invalidate tlb caches at all levels of the page table +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_PTE_ONLY 0x00000001 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE0 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE1 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE2 0x00000004 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE3 0x00000005 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE4 0x00000006 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PAGE_TABLE_LEVEL_UP_TO_PDE5 0x00000007 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE 11:10 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_VID_MEM 0x00000000 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_COHERENT 0x00000002 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_APERTURE_SYS_MEM_NONCOHERENT 0x00000003 +#define NVC36F_MEM_OP_C_TLB_INVALIDATE_PDB_ADDR_LO 31:12 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_C_ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG 19:0 +// MEM_OP_D MUST be preceded by MEM_OPs A-C. +#define NVC36F_MEM_OP_D (0x00000034) +#define NVC36F_MEM_OP_D_TLB_INVALIDATE_PDB_ADDR_HI 26:0 // only relevant if PDB_ONE +#define NVC36F_MEM_OP_D_OPERATION 31:27 +#define NVC36F_MEM_OP_D_OPERATION_MEMBAR 0x00000005 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE 0x00000009 +#define NVC36F_MEM_OP_D_OPERATION_MMU_TLB_INVALIDATE_TARGETED 0x0000000a +#define NVC36F_MEM_OP_D_OPERATION_L2_PEERMEM_INVALIDATE 0x0000000d +#define NVC36F_MEM_OP_D_OPERATION_L2_SYSMEM_INVALIDATE 0x0000000e +// CLEAN_LINES is an alias for Tegra/GPU IP usage +#define NVC36F_MEM_OP_B_OPERATION_L2_INVALIDATE_CLEAN_LINES 0x0000000e +#define NVC36F_MEM_OP_D_OPERATION_L2_CLEAN_COMPTAGS 0x0000000f +#define NVC36F_MEM_OP_D_OPERATION_L2_FLUSH_DIRTY 0x00000010 +#define NVC36F_MEM_OP_D_OPERATION_L2_WAIT_FOR_SYS_PENDING_READS 0x00000015 +#define NVC36F_MEM_OP_D_OPERATION_ACCESS_COUNTER_CLR 0x00000016 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE 1:0 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_ALL 0x00000002 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_TARGETED 0x00000003 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE 2:2 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC 0x00000000 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC 0x00000001 +#define NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_BANK 6:3 #define NVC36F_SEM_ADDR_LO (0x0000005c) #define NVC36F_SEM_ADDR_LO_OFFSET 31:2 #define NVC36F_SEM_ADDR_HI (0x00000060) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index d47442125fa1..4a75d146a171 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,7 +42,7 @@ #include "nouveau_acpi.h" static struct ida bl_ida; -#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' +#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0' static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], @@ -245,7 +245,7 @@ nv50_backlight_init(struct nouveau_backlight *bl, if (nv_conn->type == DCB_CONNECTOR_eDP) { int ret; - u16 current_level; + u32 current_level; u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; u8 current_mode; @@ -261,8 +261,9 @@ nv50_backlight_init(struct nouveau_backlight *bl, NV_DEBUG(drm, "DPCD backlight controls supported on %s\n", nv_conn->base.name); - ret = drm_edp_backlight_init(&nv_conn->aux, &bl->edp_info, 0, edp_dpcd, - ¤t_level, ¤t_mode); + ret = drm_edp_backlight_init(&nv_conn->aux, &bl->edp_info, + 0, 0, edp_dpcd, + ¤t_level, ¤t_mode, false); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b96f0555ca14..f26562eafffc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -929,7 +929,7 @@ done: nvif_vmm_put(vmm, &old_mem->vma[1]); nvif_vmm_put(vmm, &old_mem->vma[0]); } - return 0; + return ret; } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index 561877725aac..bb34b0a6082d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -31,8 +31,6 @@ struct nouveau_channel { u64 addr; } push; - /* TODO: this will be reworked in the near future */ - bool accel_done; void *fence; struct { int max; diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 200e65a7cefc..c7869a639bef 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) drm->debugfs = NULL; } -int +void nouveau_module_debugfs_init(void) { nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); - if (IS_ERR(nouveau_debugfs_root)) - return PTR_ERR(nouveau_debugfs_root); - - return 0; } void diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index b7617b344ee2..d05ed0e641c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *); extern struct dentry *nouveau_debugfs_root; -int nouveau_module_debugfs_init(void); +void nouveau_module_debugfs_init(void); void nouveau_module_debugfs_fini(void); #else static inline void @@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) { } -static inline int +static inline void nouveau_module_debugfs_init(void) { - return 0; } static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index c50ec347b30a..805d0a87aa54 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -253,6 +253,7 @@ nouveau_check_bl_size(struct nouveau_drm *drm, struct nouveau_bo *nvbo, int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb) @@ -260,7 +261,6 @@ nouveau_framebuffer_new(struct drm_device *dev, struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct drm_framebuffer *fb; - const struct drm_format_info *info; unsigned int height, i; uint32_t tile_mode; uint8_t kind; @@ -295,8 +295,6 @@ nouveau_framebuffer_new(struct drm_device *dev, kind = nvbo->kind; } - info = drm_get_format_info(dev, mode_cmd); - for (i = 0; i < info->num_planes; i++) { height = drm_format_info_plane_height(info, mode_cmd->height, @@ -320,7 +318,7 @@ nouveau_framebuffer_new(struct drm_device *dev, if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL))) return -ENOMEM; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); fb->obj[0] = gem; ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs); @@ -332,6 +330,7 @@ nouveau_framebuffer_new(struct drm_device *dev, struct drm_framebuffer * nouveau_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_framebuffer *fb; @@ -342,7 +341,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev, if (!gem) return ERR_PTR(-ENOENT); - ret = nouveau_framebuffer_new(dev, mode_cmd, gem, &fb); + ret = nouveau_framebuffer_new(dev, info, mode_cmd, gem, &fb); if (ret == 0) return fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 1f506f8b289c..470e0910d484 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -8,8 +8,11 @@ #include <drm/drm_framebuffer.h> +struct drm_format_info; + int nouveau_framebuffer_new(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *gem, struct drm_framebuffer **pfb); @@ -67,5 +70,6 @@ nouveau_framebuffer_get_layout(struct drm_framebuffer *fb, uint32_t *tile_mode, struct drm_framebuffer * nouveau_user_framebuffer_create(struct drm_device *, struct drm_file *, + const struct drm_format_info *, const struct drm_mode_fb_cmd2 *); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 0e27b76d1e1c..c25ef9a54b9f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -90,7 +90,6 @@ FIRE_RING(struct nouveau_channel *chan) { if (chan->dma.cur == chan->dma.put) return; - chan->accel_done = true; WRITE_PUT(chan->dma.cur); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 0c82a63cd49d..1527b801f013 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1461,9 +1461,7 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; - ret = nouveau_module_debugfs_init(); - if (ret) - return ret; + nouveau_module_debugfs_init(); #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_register(&nouveau_platform_driver); diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 41b7c608c905..c4949e815eb3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -60,14 +60,14 @@ * virtual address in the GPU's VA space there is no guarantee that the actual * mappings are created in the GPU's MMU. If the given memory is swapped out * at the time the bind operation is executed the kernel will stash the mapping - * details into it's internal alloctor and create the actual MMU mappings once + * details into it's internal allocator and create the actual MMU mappings once * the memory is swapped back in. While this is transparent for userspace, it is * guaranteed that all the backing memory is swapped back in and all the memory * mappings, as requested by userspace previously, are actually mapped once the * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job. * * A VM_BIND job can be executed either synchronously or asynchronously. If - * exectued asynchronously, userspace may provide a list of syncobjs this job + * executed asynchronously, userspace may provide a list of syncobjs this job * will wait for and/or a list of syncobj the kernel will signal once the * VM_BIND job finished execution. If executed synchronously the ioctl will * block until the bind job is finished. For synchronous jobs the kernel will @@ -82,7 +82,7 @@ * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have * an up to date view of the VA space. However, the actual mappings might still * be pending. Hence, EXEC jobs require to have the particular fences - of - * the corresponding VM_BIND jobs they depent on - attached to them. + * the corresponding VM_BIND jobs they depend on - attached to them. */ static int @@ -189,7 +189,7 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct nouveau_job_ops nouveau_exec_job_ops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index d5654e26d5bc..869d4335c0f4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -38,12 +38,6 @@ static const struct dma_fence_ops nouveau_fence_ops_uevent; static const struct dma_fence_ops nouveau_fence_ops_legacy; -static inline struct nouveau_fence * -from_fence(struct dma_fence *fence) -{ - return container_of(fence, struct nouveau_fence, base); -} - static inline struct nouveau_fence_chan * nouveau_fctx(struct nouveau_fence *fence) { @@ -77,7 +71,7 @@ nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) fence->ops != &nouveau_fence_ops_uevent) return NULL; - return from_fence(fence); + return to_nouveau_fence(fence); } void @@ -268,7 +262,7 @@ nouveau_fence_done(struct nouveau_fence *fence) static long nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); unsigned long sleep_time = NSEC_PER_MSEC / 1000; unsigned long t = jiffies, timeout = t + wait; @@ -448,7 +442,7 @@ static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence) static const char *nouveau_fence_get_timeline_name(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); return !fctx->dead ? fctx->name : "dead channel"; @@ -462,7 +456,7 @@ static const char *nouveau_fence_get_timeline_name(struct dma_fence *f) */ static bool nouveau_fence_is_signaled(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); struct nouveau_channel *chan; bool ret = false; @@ -478,7 +472,7 @@ static bool nouveau_fence_is_signaled(struct dma_fence *f) static bool nouveau_fence_no_signaling(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); /* * caller should have a reference on the fence, @@ -503,7 +497,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f) static void nouveau_fence_release(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); kref_put(&fctx->fence_ref, nouveau_fence_context_put); @@ -521,7 +515,7 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = { static bool nouveau_fence_enable_signaling(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 6a983dd9f7b9..183dd43ecfff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -17,6 +17,12 @@ struct nouveau_fence { unsigned long timeout; }; +static inline struct nouveau_fence * +to_nouveau_fence(struct dma_fence *fence) +{ + return container_of(fence, struct nouveau_fence, base); +} + int nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel *); int nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *); void nouveau_fence_unref(struct nouveau_fence **); diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index a5ce8eb4a3be..8d5853deeee4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -30,10 +30,7 @@ static int nouveau_platform_probe(struct platform_device *pdev) func = of_device_get_match_data(&pdev->dev); drm = nouveau_platform_device_create(func, pdev, &device); - if (IS_ERR(drm)) - return PTR_ERR(drm); - - return 0; + return PTR_ERR_OR_ZERO(drm); } static void nouveau_platform_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index cd95446d6851..caab60fc62f6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -108,9 +108,21 @@ struct dma_buf *nouveau_gem_prime_export(struct drm_gem_object *gobj, int flags) { struct nouveau_bo *nvbo = nouveau_gem_object(gobj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (nvbo->no_share) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&nvbo->bo, &ctx); + if (ret) + return ERR_PTR(ret); + return drm_gem_prime_export(gobj, flags); } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 460a5fb02412..e60f7892f5ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -371,7 +371,7 @@ nouveau_sched_timedout_job(struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = sched_job->sched; struct nouveau_job *job = to_nouveau_job(sched_job); - enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET; drm_sched_stop(sched, sched_job); diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 48f105239f42..79eefdfd08a2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1276,6 +1276,12 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, break; case OP_MAP: { struct nouveau_uvma_region *reg; + struct drm_gpuvm_map_req map_req = { + .map.va.addr = op->va.addr, + .map.va.range = op->va.range, + .map.gem.obj = op->gem.obj, + .map.gem.offset = op->gem.offset, + }; reg = nouveau_uvma_region_find_first(uvmm, op->va.addr, @@ -1301,10 +1307,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job, } op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->base, - op->va.addr, - op->va.range, - op->gem.obj, - op->gem.offset); + &map_req); if (IS_ERR(op->ops)) { ret = PTR_ERR(op->ops); goto unwind_continue; diff --git a/drivers/gpu/drm/nouveau/nvif/chan.c b/drivers/gpu/drm/nouveau/nvif/chan.c index baa10227d51a..80c01017d642 100644 --- a/drivers/gpu/drm/nouveau/nvif/chan.c +++ b/drivers/gpu/drm/nouveau/nvif/chan.c @@ -39,6 +39,9 @@ nvif_chan_gpfifo_post(struct nvif_chan *chan) const u32 pbptr = (chan->push.cur - map) + chan->func->gpfifo.post_size; const u32 gpptr = (chan->gpfifo.cur + 1) & chan->gpfifo.max; + if (!chan->func->gpfifo.post) + return 0; + return chan->func->gpfifo.post(chan, gpptr, pbptr); } diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 99296f03371a..07c1ebc2a941 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -219,7 +219,8 @@ nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, case RAW: args->type = NVIF_VMM_V0_TYPE_RAW; break; default: WARN_ON(1); - return -EINVAL; + ret = -EINVAL; + goto done; } memcpy(args->data, argv, argc); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/enum.c b/drivers/gpu/drm/nouveau/nvkm/core/enum.c index b9581feb24cc..a23b40b27b81 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/enum.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/enum.c @@ -44,7 +44,7 @@ nvkm_snprintbf(char *data, int size, const struct nvkm_bitfield *bf, u32 value) bool space = false; while (size >= 1 && bf->name) { if (value & bf->mask) { - int this = snprintf(data, size, "%s%s", + int this = scnprintf(data, size, "%s%s", space ? " " : "", bf->name); size -= this; data += this; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index fdffa0391b31..6fd4e60634fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -350,6 +350,8 @@ nvkm_fifo_dtor(struct nvkm_engine *engine) nvkm_chid_unref(&fifo->chid); nvkm_event_fini(&fifo->nonstall.event); + if (fifo->func->nonstall_dtor) + fifo->func->nonstall_dtor(fifo); mutex_destroy(&fifo->mutex); if (fifo->func->dtor) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c index e74493a4569e..6848a56f20c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c @@ -517,19 +517,11 @@ ga100_fifo_nonstall_intr(struct nvkm_inth *inth) static void ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_block(&runl->nonstall.inth); } static void ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index) { - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event); - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0); - - nvkm_inth_allow(&runl->nonstall.inth); } const struct nvkm_event_func @@ -564,12 +556,26 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo) if (ret) return ret; + nvkm_inth_allow(&runl->nonstall.inth); + nr = max(nr, runl->id + 1); } return nr; } +void +ga100_fifo_nonstall_dtor(struct nvkm_fifo *fifo) +{ + struct nvkm_runl *runl; + + nvkm_runl_foreach(runl, fifo) { + if (runl->nonstall.vector < 0) + continue; + nvkm_inth_block(&runl->nonstall.inth); + } +} + int ga100_fifo_runl_ctor(struct nvkm_fifo *fifo) { @@ -599,6 +605,7 @@ ga100_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c index 755235f55b3a..18a0b1f4eab7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c @@ -30,6 +30,7 @@ ga102_fifo = { .runl_ctor = ga100_fifo_runl_ctor, .mmu_fault = &tu102_fifo_mmu_fault, .nonstall_ctor = ga100_fifo_nonstall_ctor, + .nonstall_dtor = ga100_fifo_nonstall_dtor, .nonstall = &ga100_fifo_nonstall, .runl = &ga100_runl, .runq = &ga100_runq, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h index 5e81ae195329..fff1428ef267 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h @@ -41,6 +41,7 @@ struct nvkm_fifo_func { void (*start)(struct nvkm_fifo *, unsigned long *); int (*nonstall_ctor)(struct nvkm_fifo *); + void (*nonstall_dtor)(struct nvkm_fifo *); const struct nvkm_event_func *nonstall; const struct nvkm_runl_func *runl; @@ -200,6 +201,7 @@ u32 tu102_chan_doorbell_handle(struct nvkm_chan *); int ga100_fifo_runl_ctor(struct nvkm_fifo *); int ga100_fifo_nonstall_ctor(struct nvkm_fifo *); +void ga100_fifo_nonstall_dtor(struct nvkm_fifo *); extern const struct nvkm_event_func ga100_fifo_nonstall; extern const struct nvkm_runl_func ga100_runl; extern const struct nvkm_runq_func ga100_runq; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c index b7da3ab44c27..7c43397c19e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c @@ -103,7 +103,7 @@ gm200_flcn_pio_imem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 i static void gm200_flcn_pio_imem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag) { - nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag++); + nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag); while (len >= 4) { nvkm_falcon_wr32(falcon, 0x184 + (port * 0x10), *(u32 *)img); img += 4; @@ -249,9 +249,11 @@ int gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) { struct nvkm_falcon *falcon = fw->falcon; - int target, ret; + int ret; if (fw->inst) { + int target; + nvkm_falcon_mask(falcon, 0x048, 0x00000001, 0x00000001); switch (nvkm_memory_target(fw->inst)) { @@ -285,15 +287,6 @@ gm200_flcn_fw_load(struct nvkm_falcon_fw *fw) } if (fw->boot) { - switch (nvkm_memory_target(&fw->fw.mem.memory)) { - case NVKM_MEM_TARGET_VRAM: target = 4; break; - case NVKM_MEM_TARGET_HOST: target = 5; break; - case NVKM_MEM_TARGET_NCOH: target = 6; break; - default: - WARN_ON(1); - return -EINVAL; - } - ret = nvkm_falcon_pio_wr(falcon, fw->boot, 0, 0, IMEM, falcon->code.limit - fw->boot_size, fw->boot_size, fw->boot_addr >> 8, false); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c index eb765da0876e..35d1fcef520b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ad102.c @@ -41,8 +41,8 @@ ad102_gsp = { static struct nvkm_gsp_fwif ad102_gsps[] = { - { 1, tu102_gsp_load, &ad102_gsp, &r570_rm_ga102, "570.144", true }, - { 0, tu102_gsp_load, &ad102_gsp, &r535_rm_ga102, "535.113.01", true }, + { 1, tu102_gsp_load, &ad102_gsp, &r570_rm_ga102, "570.144" }, + { 0, tu102_gsp_load, &ad102_gsp, &r535_rm_ga102, "535.113.01" }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c index d23243a83a4c..7ccb41761066 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c @@ -138,8 +138,10 @@ nvkm_gsp_new_(const struct nvkm_gsp_fwif *fwif, struct nvkm_device *device, nvkm_subdev_ctor(&nvkm_gsp, device, type, inst, &gsp->subdev); fwif = nvkm_firmware_load(&gsp->subdev, fwif, "Gsp", gsp); - if (IS_ERR(fwif)) + if (IS_ERR(fwif)) { + nvkm_error(&gsp->subdev, "Failed to load required firmware for device."); return PTR_ERR(fwif); + } gsp->func = fwif->func; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c index 52412965fac1..5b721bd9d799 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/fwsec.c @@ -209,11 +209,12 @@ nvkm_gsp_fwsec_v2(struct nvkm_gsp *gsp, const char *name, fw->boot_addr = bld->start_tag << 8; fw->boot_size = bld->code_size; fw->boot = kmemdup(bl->data + hdr->data_offset + bld->code_off, fw->boot_size, GFP_KERNEL); - if (!fw->boot) - ret = -ENOMEM; nvkm_firmware_put(bl); + if (!fw->boot) + return -ENOMEM; + /* Patch in interface data. */ return nvkm_gsp_fwsec_patch(gsp, fw, desc->InterfaceOffset, init_cmd); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c index 12a3f2c1ed82..1b3b31b95ce4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb100.c @@ -20,7 +20,7 @@ gb100_gsp = { static struct nvkm_gsp_fwif gb100_gsps[] = { - { 0, gh100_gsp_load, &gb100_gsp, &r570_rm_gb10x, "570.144", true }, + { 0, gh100_gsp_load, &gb100_gsp, &r570_rm_gb10x, "570.144" }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c index c1d718172ddf..51384c63148c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gb202.c @@ -20,7 +20,7 @@ gb202_gsp = { static struct nvkm_gsp_fwif gb202_gsps[] = { - { 0, gh100_gsp_load, &gb202_gsp, &r570_rm_gb20x, "570.144", true }, + { 0, gh100_gsp_load, &gb202_gsp, &r570_rm_gb20x, "570.144" }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c index ce31e8248807..b0dd5fce7bad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gh100.c @@ -344,7 +344,7 @@ done: static struct nvkm_gsp_fwif gh100_gsps[] = { - { 0, gh100_gsp_load, &gh100_gsp, &r570_rm_gh100, "570.144", true }, + { 0, gh100_gsp_load, &gh100_gsp, &r570_rm_gh100, "570.144" }, {} }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h index 4f14e85fc69e..c3494b7ac572 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h @@ -14,7 +14,6 @@ struct nvkm_gsp_fwif { const struct nvkm_gsp_func *func; const struct nvkm_rm_impl *rm; const char *ver; - bool enable; }; int nvkm_gsp_load_fw(struct nvkm_gsp *, const char *name, const char *ver, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c index 1ac5628c5140..4ed54b386a60 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c @@ -601,6 +601,7 @@ r535_fifo_new(const struct nvkm_fifo_func *hw, struct nvkm_device *device, rm->chan.func = &r535_chan; rm->nonstall = &ga100_fifo_nonstall; rm->nonstall_ctor = ga100_fifo_nonstall_ctor; + rm->nonstall_dtor = ga100_fifo_nonstall_dtor; return nvkm_fifo_new_(rm, device, type, inst, pfifo); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index baf42339f93e..32e6a065d6d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -582,10 +582,13 @@ struct nv_gsp_registry_entries { * RMSecBusResetEnable - enables PCI secondary bus reset * RMForcePcieConfigSave - forces GSP-RM to preserve PCI configuration * registers on any PCI reset. + * RMDevidCheckIgnore - allows GSP-RM to boot even if the PCI dev ID + * is not found in the internal product name database. */ static const struct nv_gsp_registry_entries r535_registry_entries[] = { { "RMSecBusResetEnable", 1 }, { "RMForcePcieConfigSave", 1 }, + { "RMDevidCheckIgnore", 1 }, }; #define NV_GSP_REG_NUM_ENTRIES ARRAY_SIZE(r535_registry_entries) @@ -719,7 +722,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = 4, - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; caps->status = 0xffff; @@ -727,17 +729,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a))) return; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -754,24 +761,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = sizeof(caps), - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; jt->status = 0xffff; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; jt->status = 0; jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -1744,6 +1755,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt); return ret; } + + /* + * TODO: Debug the GSP firmware / RPC handling to find out why + * without this Turing (but none of the other architectures) + * ends up resetting all channels after resume. + */ + msleep(50); } ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 5acb98d137bd..0dc4782df8c0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -325,7 +325,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_peek(gsp, sizeof(*rpc), info.retries); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } @@ -334,7 +334,7 @@ r535_gsp_msgq_recv(struct nvkm_gsp *gsp, u32 gsp_rpc_len, int *retries) rpc = r535_gsp_msgq_recv_one_elem(gsp, &info); if (IS_ERR_OR_NULL(rpc)) { - kfree(buf); + kvfree(buf); return rpc; } @@ -637,12 +637,18 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, if (payload_size > max_payload_size) { const u32 fn = rpc->function; u32 remain_payload_size = payload_size; + void *next; - /* Adjust length, and send initial RPC. */ - rpc->length = sizeof(*rpc) + max_payload_size; - msg->checksum = rpc->length; + /* Send initial RPC. */ + next = r535_gsp_rpc_get(gsp, fn, max_payload_size); + if (IS_ERR(next)) { + repv = next; + goto done; + } - repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0); + memcpy(next, payload, max_payload_size); + + repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0); if (IS_ERR(repv)) goto done; @@ -653,7 +659,6 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, while (remain_payload_size) { u32 size = min(remain_payload_size, max_payload_size); - void *next; next = r535_gsp_rpc_get(gsp, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size); if (IS_ERR(next)) { @@ -674,6 +679,8 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, /* Wait for reply. */ repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size + sizeof(*rpc)); + if (!IS_ERR(repv)) + kvfree(msg); } else { repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c index 52f2e5f14517..f25ea610cd99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c @@ -121,7 +121,7 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external) page_shift -= desc->bits; ctrl->levels[i].physAddress = pd->pt[0]->addr; - ctrl->levels[i].size = (1 << desc->bits) * desc->size; + ctrl->levels[i].size = BIT_ULL(desc->bits) * desc->size; ctrl->levels[i].aperture = 1; ctrl->levels[i].pageShift = page_shift; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index 58e233bc53b1..81e56da0474a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -383,13 +383,9 @@ int tu102_gsp_load_rm(struct nvkm_gsp *gsp, const struct nvkm_gsp_fwif *fwif) { struct nvkm_subdev *subdev = &gsp->subdev; - bool enable_gsp = fwif->enable; int ret; -#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT) - enable_gsp = true; -#endif - if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp)) + if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", true)) return -EINVAL; ret = nvkm_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm); diff --git a/drivers/gpu/drm/nova/driver.rs b/drivers/gpu/drm/nova/driver.rs index b28b2e05cc15..91b7380f83ab 100644 --- a/drivers/gpu/drm/nova/driver.rs +++ b/drivers/gpu/drm/nova/driver.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, types::ARef}; +use kernel::{ + auxiliary, c_str, device::Core, drm, drm::gem, drm::ioctl, prelude::*, sync::aref::ARef, +}; use crate::file::File; use crate::gem::NovaObject; diff --git a/drivers/gpu/drm/nova/file.rs b/drivers/gpu/drm/nova/file.rs index 7e59a34b830d..90b9d2d0ec4a 100644 --- a/drivers/gpu/drm/nova/file.rs +++ b/drivers/gpu/drm/nova/file.rs @@ -2,13 +2,11 @@ use crate::driver::{NovaDevice, NovaDriver}; use crate::gem::NovaObject; -use crate::uapi::{GemCreate, GemInfo, Getparam}; use kernel::{ alloc::flags::*, drm::{self, gem::BaseObject}, pci, prelude::*, - types::Opaque, uapi, }; @@ -26,20 +24,19 @@ impl File { /// IOCTL: get_param: Query GPU / driver metadata. pub(crate) fn get_param( dev: &NovaDevice, - getparam: &Opaque<uapi::drm_nova_getparam>, + getparam: &mut uapi::drm_nova_getparam, _file: &drm::File<File>, ) -> Result<u32> { let adev = &dev.adev; let parent = adev.parent().ok_or(ENOENT)?; let pdev: &pci::Device = parent.try_into()?; - let getparam: &Getparam = getparam.into(); - let value = match getparam.param() as u32 { + let value = match getparam.param as u32 { uapi::NOVA_GETPARAM_VRAM_BAR_SIZE => pdev.resource_len(1)?, _ => return Err(EINVAL), }; - getparam.set_value(value); + getparam.value = Into::<u64>::into(value); Ok(0) } @@ -47,13 +44,12 @@ impl File { /// IOCTL: gem_create: Create a new DRM GEM object. pub(crate) fn gem_create( dev: &NovaDevice, - req: &Opaque<uapi::drm_nova_gem_create>, + req: &mut uapi::drm_nova_gem_create, file: &drm::File<File>, ) -> Result<u32> { - let req: &GemCreate = req.into(); - let obj = NovaObject::new(dev, req.size().try_into()?)?; + let obj = NovaObject::new(dev, req.size.try_into()?)?; - req.set_handle(obj.create_handle(file)?); + req.handle = obj.create_handle(file)?; Ok(0) } @@ -61,13 +57,12 @@ impl File { /// IOCTL: gem_info: Query GEM metadata. pub(crate) fn gem_info( _dev: &NovaDevice, - req: &Opaque<uapi::drm_nova_gem_info>, + req: &mut uapi::drm_nova_gem_info, file: &drm::File<File>, ) -> Result<u32> { - let req: &GemInfo = req.into(); - let bo = NovaObject::lookup_handle(file, req.handle())?; + let bo = NovaObject::lookup_handle(file, req.handle)?; - req.set_size(bo.size().try_into()?); + req.size = bo.size().try_into()?; Ok(0) } diff --git a/drivers/gpu/drm/nova/gem.rs b/drivers/gpu/drm/nova/gem.rs index 33b62d21400c..2760ba4f3450 100644 --- a/drivers/gpu/drm/nova/gem.rs +++ b/drivers/gpu/drm/nova/gem.rs @@ -4,7 +4,7 @@ use kernel::{ drm, drm::{gem, gem::BaseObject}, prelude::*, - types::ARef, + sync::aref::ARef, }; use crate::{ @@ -16,16 +16,14 @@ use crate::{ #[pin_data] pub(crate) struct NovaObject {} -impl gem::BaseDriverObject<gem::Object<NovaObject>> for NovaObject { +impl gem::DriverObject for NovaObject { + type Driver = NovaDriver; + fn new(_dev: &NovaDevice, _size: usize) -> impl PinInit<Self, Error> { try_pin_init!(NovaObject {}) } } -impl gem::DriverObject for NovaObject { - type Driver = NovaDriver; -} - impl NovaObject { /// Create a new DRM GEM object. pub(crate) fn new(dev: &NovaDevice, size: usize) -> Result<ARef<gem::Object<Self>>> { diff --git a/drivers/gpu/drm/nova/nova.rs b/drivers/gpu/drm/nova/nova.rs index 902876aa14d1..8893e58ee0db 100644 --- a/drivers/gpu/drm/nova/nova.rs +++ b/drivers/gpu/drm/nova/nova.rs @@ -5,14 +5,13 @@ mod driver; mod file; mod gem; -mod uapi; use crate::driver::NovaDriver; kernel::module_auxiliary_driver! { type: NovaDriver, name: "Nova", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova GPU driver", license: "GPL v2", } diff --git a/drivers/gpu/drm/nova/uapi.rs b/drivers/gpu/drm/nova/uapi.rs deleted file mode 100644 index eb228a58d423..000000000000 --- a/drivers/gpu/drm/nova/uapi.rs +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -use kernel::uapi; - -// TODO Work out some common infrastructure to avoid boilerplate code for uAPI abstractions. - -macro_rules! define_uapi_abstraction { - ($name:ident <= $inner:ty) => { - #[repr(transparent)] - pub struct $name(::kernel::types::Opaque<$inner>); - - impl ::core::convert::From<&::kernel::types::Opaque<$inner>> for &$name { - fn from(value: &::kernel::types::Opaque<$inner>) -> Self { - // SAFETY: `Self` is a transparent wrapper of `$inner`. - unsafe { ::core::mem::transmute(value) } - } - } - }; -} - -define_uapi_abstraction!(Getparam <= uapi::drm_nova_getparam); - -impl Getparam { - pub fn param(&self) -> u64 { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_getparam`. - unsafe { (*self.0.get()).param } - } - - pub fn set_value(&self, v: u64) { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_getparam`. - unsafe { (*self.0.get()).value = v }; - } -} - -define_uapi_abstraction!(GemCreate <= uapi::drm_nova_gem_create); - -impl GemCreate { - pub fn size(&self) -> u64 { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_gem_create`. - unsafe { (*self.0.get()).size } - } - - pub fn set_handle(&self, handle: u32) { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_gem_create`. - unsafe { (*self.0.get()).handle = handle }; - } -} - -define_uapi_abstraction!(GemInfo <= uapi::drm_nova_gem_info); - -impl GemInfo { - pub fn handle(&self) -> u32 { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_gem_info`. - unsafe { (*self.0.get()).handle } - } - - pub fn set_size(&self, size: u64) { - // SAFETY: `self.get()` is a valid pointer to a `struct drm_nova_gem_info`. - unsafe { (*self.0.get()).size = size }; - } -} diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 054b71dba6a7..794267f0f007 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -378,10 +378,8 @@ static int omap_display_id(struct omap_dss_device *output) struct device_node *node = NULL; if (output->bridge) { - struct drm_bridge *bridge = output->bridge; - - while (drm_bridge_get_next_bridge(bridge)) - bridge = drm_bridge_get_next_bridge(bridge); + struct drm_bridge *bridge __free(drm_bridge_put) = + drm_bridge_chain_get_last_bridge(output->bridge->encoder); node = bridge->of_node; } diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 449d521c78fe..bb3105556f19 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -335,10 +335,9 @@ void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m) #endif struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); unsigned int num_planes = info->num_planes; struct drm_gem_object *bos[4]; struct drm_framebuffer *fb; @@ -352,7 +351,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, } } - fb = omap_framebuffer_init(dev, mode_cmd, bos); + fb = omap_framebuffer_init(dev, info, mode_cmd, bos); if (IS_ERR(fb)) goto error; @@ -366,9 +365,9 @@ error: } struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - const struct drm_format_info *format = NULL; struct omap_framebuffer *omap_fb = NULL; struct drm_framebuffer *fb = NULL; unsigned int pitch = mode_cmd->pitches[0]; @@ -378,14 +377,12 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, dev, mode_cmd, mode_cmd->width, mode_cmd->height, (char *)&mode_cmd->pixel_format); - format = drm_get_format_info(dev, mode_cmd); - for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i] == mode_cmd->pixel_format) break; } - if (!format || i == ARRAY_SIZE(formats)) { + if (i == ARRAY_SIZE(formats)) { dev_dbg(dev->dev, "unsupported pixel format: %4.4s\n", (char *)&mode_cmd->pixel_format); ret = -EINVAL; @@ -399,7 +396,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, } fb = &omap_fb->base; - omap_fb->format = format; + omap_fb->format = info; mutex_init(&omap_fb->lock); /* @@ -407,23 +404,23 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, * that the two planes of multiplane formats need the same number of * bytes per pixel. */ - if (format->num_planes == 2 && pitch != mode_cmd->pitches[1]) { + if (info->num_planes == 2 && pitch != mode_cmd->pitches[1]) { dev_dbg(dev->dev, "pitches differ between planes 0 and 1\n"); ret = -EINVAL; goto fail; } - if (pitch % format->cpp[0]) { + if (pitch % info->cpp[0]) { dev_dbg(dev->dev, "buffer pitch (%u bytes) is not a multiple of pixel size (%u bytes)\n", - pitch, format->cpp[0]); + pitch, info->cpp[0]); ret = -EINVAL; goto fail; } - for (i = 0; i < format->num_planes; i++) { + for (i = 0; i < info->num_planes; i++) { struct plane *plane = &omap_fb->planes[i]; - unsigned int vsub = i == 0 ? 1 : format->vsub; + unsigned int vsub = i == 0 ? 1 : info->vsub; unsigned int size; size = pitch * mode_cmd->height / vsub; @@ -440,7 +437,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, plane->dma_addr = 0; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &omap_framebuffer_funcs); if (ret) { diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h index b75f0b5ef1d8..e6010302a22b 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.h +++ b/drivers/gpu/drm/omapdrm/omap_fb.h @@ -13,6 +13,7 @@ struct drm_connector; struct drm_device; struct drm_file; struct drm_framebuffer; +struct drm_format_info; struct drm_gem_object; struct drm_mode_fb_cmd2; struct drm_plane_state; @@ -20,8 +21,10 @@ struct omap_overlay_info; struct seq_file; struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); int omap_framebuffer_pin(struct drm_framebuffer *fb); void omap_framebuffer_unpin(struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 7b6396890681..948af7ec1130 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -197,7 +197,10 @@ int omap_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, goto fail; } - fb = omap_framebuffer_init(dev, &mode_cmd, &bo); + fb = omap_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &bo); if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index b9c67e4ca360..381552bfb409 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -8,7 +8,6 @@ #include <linux/seq_file.h> #include <linux/shmem_fs.h> #include <linux/spinlock.h> -#include <linux/pfn_t.h> #include <linux/vmalloc.h> #include <drm/drm_prime.h> @@ -371,8 +370,7 @@ static vm_fault_t omap_gem_fault_1d(struct drm_gem_object *obj, VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address, pfn, pfn << PAGE_SHIFT); - return vmf_insert_mixed(vma, vmf->address, - __pfn_to_pfn_t(pfn, PFN_DEV)); + return vmf_insert_mixed(vma, vmf->address, pfn); } /* Special handling for the case of faulting in 2d tiled buffers */ @@ -467,8 +465,7 @@ static vm_fault_t omap_gem_fault_2d(struct drm_gem_object *obj, pfn, pfn << PAGE_SHIFT); for (i = n; i > 0; i--) { - ret = vmf_insert_mixed(vma, - vaddr, __pfn_to_pfn_t(pfn, PFN_DEV)); + ret = vmf_insert_mixed(vma, vaddr, pfn); if (ret & VM_FAULT_ERROR) break; pfn += priv->usergart[fmt].stride_pfn; diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d5aa1c95c6a4..407c5f6a268b 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -193,6 +193,16 @@ config DRM_PANEL_HIMAX_HX83112A Say Y here if you want to enable support for Himax HX83112A-based display panels, such as the one found in the Fairphone 4 smartphone. +config DRM_PANEL_HIMAX_HX83112B + tristate "Himax HX83112B-based DSI panel" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + select DRM_KMS_HELPER + help + Say Y here if you want to enable support for Himax HX83112B-based + display panels, such as the one found in the Fairphone 3 smartphone. + config DRM_PANEL_HIMAX_HX8394 tristate "HIMAX HX8394 MIPI-DSI LCD panels" depends on OF @@ -205,6 +215,19 @@ config DRM_PANEL_HIMAX_HX8394 If M is selected the module will be called panel-himax-hx8394. +config DRM_PANEL_HYDIS_HV101HD1 + tristate "Hydis HV101HD1 panel" + depends on OF + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y here if you want to enable support for the Hydis HV101HD1 + 2-lane 1366x768 MIPI DSI panel found in ASUS VivoTab RT TF600T. + HV101HD1 is a color active matrix TFT LCD module using amorphous + silicon TFT's (Thin Film Transistors) as an active switching devices. + + If M is selected the module will be called panel-hydis-hv101hd1 + config DRM_PANEL_ILITEK_IL9322 tristate "Ilitek ILI9322 320x240 QVGA panels" depends on OF && SPI @@ -833,6 +856,17 @@ config DRM_PANEL_SAMSUNG_S6E8AA0 select DRM_MIPI_DSI select VIDEOMODE_HELPERS +config DRM_PANEL_SAMSUNG_S6E8AA5X01_AMS561RA01 + tristate "Samsung AMS561RA01 panel with S6E8AA5X01 controller" + depends on GPIOLIB && OF && REGULATOR + depends on DRM_MIPI_DSI + depends on BACKLIGHT_CLASS_DEVICE + help + Say Y here if you want to enable support for Samsung AMS561RA01 + panel, which uses Samsung's S6E8AA5X01 controller. The panel has a + ~5.6 inch AMOLED display, and the controller is driven by the MIPI + DSI protocol with 4 lanes. + config DRM_PANEL_SAMSUNG_SOFEF00 tristate "Samsung sofef00/s6e3fc2x01 OnePlus 6/6T DSI cmd mode panels" depends on OF @@ -961,7 +995,7 @@ config DRM_PANEL_STARTEK_KD070FHFID015 depends on BACKLIGHT_CLASS_DEVICE help Say Y here if you want to enable support for STARTEK KD070FHFID015 DSI panel - based on RENESAS-R69429 controller. The pannel is a 7-inch TFT LCD display + based on RENESAS-R69429 controller. The panel is a 7-inch TFT LCD display with a resolution of 1024 x 600 pixels. It provides a MIPI DSI interface to the host, a built-in LED backlight and touch controller. diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index 73a39bc72604..3615a761b44f 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -20,7 +20,9 @@ obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d obj-$(CONFIG_DRM_PANEL_HIMAX_HX8279) += panel-himax-hx8279.o obj-$(CONFIG_DRM_PANEL_HIMAX_HX83102) += panel-himax-hx83102.o obj-$(CONFIG_DRM_PANEL_HIMAX_HX83112A) += panel-himax-hx83112a.o +obj-$(CONFIG_DRM_PANEL_HIMAX_HX83112B) += panel-himax-hx83112b.o obj-$(CONFIG_DRM_PANEL_HIMAX_HX8394) += panel-himax-hx8394.o +obj-$(CONFIG_DRM_PANEL_HYDIS_HV101HD1) += panel-hydis-hv101hd1.o obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9805) += panel-ilitek-ili9805.o @@ -86,6 +88,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0_DSI) += panel-samsung-s6e63m0-dsi.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS427AP24) += panel-samsung-s6e88a0-ams427ap24.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E88A0_AMS452EF01) += panel-samsung-s6e88a0-ams452ef01.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA5X01_AMS561RA01) += panel-samsung-s6e8aa5x01-ams561ra01.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_SOFEF00) += panel-samsung-sofef00.o obj-$(CONFIG_DRM_PANEL_SEIKO_43WVF1G) += panel-seiko-43wvf1g.o obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o diff --git a/drivers/gpu/drm/panel/panel-boe-himax8279d.c b/drivers/gpu/drm/panel/panel-boe-himax8279d.c index df746baae301..4a8560b4b899 100644 --- a/drivers/gpu/drm/panel/panel-boe-himax8279d.c +++ b/drivers/gpu/drm/panel/panel-boe-himax8279d.c @@ -847,9 +847,6 @@ static int panel_add(struct panel_info *pinfo) "failed to get enable gpio\n"); } - drm_panel_init(&pinfo->base, dev, &panel_funcs, - DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&pinfo->base); if (ret) return ret; @@ -865,9 +862,11 @@ static int panel_probe(struct mipi_dsi_device *dsi) const struct panel_desc *desc; int err; - pinfo = devm_kzalloc(&dsi->dev, sizeof(*pinfo), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; + pinfo = devm_drm_panel_alloc(&dsi->dev, __typeof(*pinfo), base, + &panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(pinfo)) + return PTR_ERR(pinfo); desc = of_device_get_match_data(&dsi->dev); dsi->mode_flags = desc->mode_flags; diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 3e5b0d8636d0..d5fe105bdbdd 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1720,8 +1720,6 @@ static int boe_panel_add(struct boe_panel *boe) boe->base.prepare_prev_first = true; - drm_panel_init(&boe->base, dev, &boe_panel_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &boe->orientation); if (err < 0) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); @@ -1746,9 +1744,11 @@ static int boe_panel_probe(struct mipi_dsi_device *dsi) int ret; const struct panel_desc *desc; - boe = devm_kzalloc(&dsi->dev, sizeof(*boe), GFP_KERNEL); - if (!boe) - return -ENOMEM; + boe = devm_drm_panel_alloc(&dsi->dev, __typeof(*boe), base, + &boe_panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(boe)) + return PTR_ERR(boe); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 6c45c9e879ec..62435e3cd9f4 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1736,10 +1736,11 @@ static const struct panel_delay delay_200_500_e50 = { .enable = 50, }; -static const struct panel_delay delay_200_500_e50_p2e200 = { +static const struct panel_delay delay_200_500_e50_d50_p2e200 = { .hpd_absent = 200, .unprepare = 500, .enable = 50, + .disable = 50, .prepare_to_enable = 200, }; @@ -1795,6 +1796,13 @@ static const struct panel_delay delay_200_500_e200_d10 = { .disable = 10, }; +static const struct panel_delay delay_200_500_e200_d50 = { + .hpd_absent = 200, + .unprepare = 500, + .enable = 200, + .disable = 50, +}; + static const struct panel_delay delay_200_150_e200 = { .hpd_absent = 200, .unprepare = 150, @@ -1828,6 +1836,20 @@ static const struct panel_delay delay_50_500_e200_d200_po2e335 = { .powered_on_to_enable = 335, }; +static const struct panel_delay delay_200_500_e50_d100 = { + .hpd_absent = 200, + .unprepare = 500, + .enable = 50, + .disable = 100, +}; + +static const struct panel_delay delay_80_500_e50_d50 = { + .hpd_absent = 80, + .unprepare = 500, + .enable = 50, + .disable = 50, +}; + #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \ { \ .ident = { \ @@ -1857,6 +1879,7 @@ static const struct panel_delay delay_50_500_e200_d200_po2e335 = { * Sort first by vendor, then by product ID. */ static const struct edp_panel_entry edp_panels[] = { + EDP_PANEL_ENTRY('A', 'U', 'O', 0x04a4, &delay_200_500_e50, "B122UAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x105c, &delay_200_500_e50, "B116XTN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x1062, &delay_200_500_e50, "B120XAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x125c, &delay_200_500_e50, "Unknown"), @@ -1875,6 +1898,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY2('A', 'U', 'O', 0x405c, &auo_b116xak01.delay, "B116XAK01.0", &auo_b116xa3_mode), EDP_PANEL_ENTRY('A', 'U', 'O', 0x435c, &delay_200_500_e50, "Unknown"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x52b0, &delay_200_500_e50, "B116XAK02.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x582d, &delay_200_500_e50, "B133UAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x615c, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x635c, &delay_200_500_e50, "B116XAN06.3"), @@ -1882,10 +1906,12 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('A', 'U', 'O', 0x723c, &delay_200_500_e50, "B140XTN07.2"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x73aa, &delay_200_500_e50, "B116XTN02.3"), EDP_PANEL_ENTRY('A', 'U', 'O', 0x8594, &delay_200_500_e50, "B133UAN01.0"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0x8bba, &delay_200_500_e50, "B140UAN08.5"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xa199, &delay_200_500_e50, "B116XAN06.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xa7b3, &delay_200_500_e50, "B140UAN04.4"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xc4b4, &delay_200_500_e50, "B116XAT04.1"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xc9a8, &delay_200_500_e50, "B140QAN08.H"), + EDP_PANEL_ENTRY('A', 'U', 'O', 0xcdba, &delay_200_500_e50, "B140UAX01.2"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xd497, &delay_200_500_e50, "B120XAN01.0"), EDP_PANEL_ENTRY('A', 'U', 'O', 0xf390, &delay_200_500_e50, "B140XTN07.7"), @@ -1934,20 +1960,25 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a1b, &delay_200_500_e50, "NV133WUM-N63"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a36, &delay_200_500_e200, "Unknown"), - EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a3e, &delay_200_500_e80, "NV116WHM-N49"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a3e, &delay_200_500_e80_d50, "NV116WHM-N49"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a6a, &delay_200_500_e80, "NV140WUM-N44"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ac5, &delay_200_500_e50, "NV116WHM-N4C"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ae8, &delay_200_500_e50_p2e80, "NV140WUM-N41"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b09, &delay_200_500_e50_po2e200, "NV140FHM-NZ"), - EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b34, &delay_200_500_e80, "NV122WUM-N41"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b1e, &delay_200_500_e80, "NE140QDM-N6A"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b34, &delay_200_500_e80_d50, "NV122WUM-N41"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b43, &delay_200_500_e200, "NV140FHM-T09"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b66, &delay_200_500_e80, "NE140WUM-N6G"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c20, &delay_200_500_e80, "NT140FHM-N47"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c93, &delay_200_500_e200, "Unknown"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cb6, &delay_200_500_e200, "NT116WHM-N44"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cf6, &delay_200_500_e200, "NV140WUM-N64"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cfa, &delay_200_500_e50, "NV116WHM-A4D"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0d45, &delay_200_500_e80, "NV116WHM-N4B"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0d73, &delay_200_500_e80, "NE140WUM-N6S"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ddf, &delay_200_500_e80, "NV116WHM-T01"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1130, &delay_200_500_e50, "N116BGE-EB2"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1132, &delay_200_500_e80_d50, "N116BGE-EA2"), @@ -1965,26 +1996,36 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('C', 'M', 'N', 0x115b, &delay_200_500_e80_d50, "N116BCN-EB1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x115d, &delay_200_500_e80_d50, "N116BCA-EA2"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x115e, &delay_200_500_e80_d50, "N116BCA-EA1"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x115f, &delay_200_500_e80_d50, "N116BCL-EAK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1160, &delay_200_500_e80_d50, "N116BCJ-EAK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1161, &delay_200_500_e80, "N116BCP-EA2"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1163, &delay_200_500_e80_d50, "N116BCJ-EAK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1247, &delay_200_500_e80_d50, "N120ACA-EA1"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x124c, &delay_200_500_e80_d50, "N122JCA-ENK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x142b, &delay_200_500_e80_d50, "N140HCA-EAC"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x142e, &delay_200_500_e80_d50, "N140BGA-EA4"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x1441, &delay_200_500_e80_d50, "N140JCA-ELK"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x144f, &delay_200_500_e80_d50, "N140HGA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x1468, &delay_200_500_e80, "N140HGA-EA1"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x14a8, &delay_200_500_e80, "N140JCA-ELP"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d4, &delay_200_500_e80_d50, "N140HCA-EAC"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x14d6, &delay_200_500_e80_d50, "N140BGA-EA4"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x14e5, &delay_200_500_e80_d50, "N140HGA-EA1"), EDP_PANEL_ENTRY('C', 'M', 'N', 0x162b, &delay_200_500_e80_d50, "N160JCE-ELL"), + EDP_PANEL_ENTRY('C', 'M', 'N', 0x7402, &delay_200_500_e200_d50, "N116BCA-EAK"), - EDP_PANEL_ENTRY('C', 'S', 'O', 0x1200, &delay_200_500_e50_p2e200, "MNC207QS1-1"), - EDP_PANEL_ENTRY('C', 'S', 'O', 0x1413, &delay_200_500_e50_p2e200, "MNE007JA1-2"), + EDP_PANEL_ENTRY('C', 'S', 'O', 0x1200, &delay_200_500_e50_d50_p2e200, "MNC207QS1-1"), + EDP_PANEL_ENTRY('C', 'S', 'O', 0x1413, &delay_200_500_e50_d50_p2e200, "MNE007JA1-2"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1100, &delay_200_500_e80_d50, "MNB601LS1-1"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1103, &delay_200_500_e80_d50, "MNB601LS1-3"), - EDP_PANEL_ENTRY('C', 'S', 'W', 0x1104, &delay_200_500_e50, "MNB601LS1-4"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x1104, &delay_200_500_e50_d100, "MNB601LS1-4"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x143f, &delay_200_500_e50, "MNE007QS3-6"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1448, &delay_200_500_e50, "MNE007QS3-7"), EDP_PANEL_ENTRY('C', 'S', 'W', 0x1457, &delay_80_500_e80_p2e200, "MNE007QS3-8"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x1462, &delay_200_500_e50, "MNE007QS5-2"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x1468, &delay_200_500_e50, "MNE007QB2-2"), + EDP_PANEL_ENTRY('C', 'S', 'W', 0x146e, &delay_80_500_e50_d50, "MNE007QB3-1"), EDP_PANEL_ENTRY('E', 'T', 'C', 0x0000, &delay_50_500_e200_d200_po2e335, "LP079QX1-SP0V"), @@ -2025,12 +2066,16 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('S', 'H', 'P', 0x1523, &delay_80_500_e50, "LQ140M1JW46"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x153a, &delay_200_500_e50, "LQ140T1JH01"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x154c, &delay_200_500_p2e100, "LQ116M1JW10"), + EDP_PANEL_ENTRY('S', 'H', 'P', 0x158f, &delay_200_500_p2e100, "LQ134Z1"), EDP_PANEL_ENTRY('S', 'H', 'P', 0x1593, &delay_200_500_p2e100, "LQ134N1"), EDP_PANEL_ENTRY('S', 'T', 'A', 0x0004, &delay_200_500_e200, "116KHD024006"), EDP_PANEL_ENTRY('S', 'T', 'A', 0x0009, &delay_200_500_e250, "116QHD024002"), EDP_PANEL_ENTRY('S', 'T', 'A', 0x0100, &delay_100_500_e200, "2081116HHD028001-51D"), + EDP_PANEL_ENTRY('T', 'M', 'A', 0x0811, &delay_200_500_e80_d50, "TM140VDXP01-04"), + EDP_PANEL_ENTRY('T', 'M', 'A', 0x2094, &delay_200_500_e50_d100, "TL140VDMS03-01"), + { /* sentinal */ } }; diff --git a/drivers/gpu/drm/panel/panel-himax-hx8279.c b/drivers/gpu/drm/panel/panel-himax-hx8279.c index fb302d1f91b9..9e443c719843 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx8279.c +++ b/drivers/gpu/drm/panel/panel-himax-hx8279.c @@ -935,7 +935,7 @@ static int hx8279_check_dig_gamma(struct hx8279 *hx, struct device *dev, const u j++; x++; } while (x < 4); - }; + } return 0; } diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 66abfc44e424..4c432d207634 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -989,8 +989,6 @@ static int hx83102_panel_add(struct hx83102 *ctx) ctx->base.prepare_prev_first = true; - drm_panel_init(&ctx->base, dev, &hx83102_drm_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation); if (err < 0) return dev_err_probe(dev, err, "failed to get orientation\n"); @@ -1013,9 +1011,11 @@ static int hx83102_probe(struct mipi_dsi_device *dsi) int ret; const struct hx83102_panel_desc *desc; - ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + ctx = devm_drm_panel_alloc(&dsi->dev, __typeof(*ctx), base, + &hx83102_drm_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = 4; diff --git a/drivers/gpu/drm/panel/panel-himax-hx83112b.c b/drivers/gpu/drm/panel/panel-himax-hx83112b.c new file mode 100644 index 000000000000..263f79a967de --- /dev/null +++ b/drivers/gpu/drm/panel/panel-himax-hx83112b.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generated with linux-mdss-dsi-panel-driver-generator from vendor device tree. + * Copyright (c) 2025 Luca Weiss <luca@lucaweiss.eu> + */ + +#include <linux/backlight.h> +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/regulator/consumer.h> + +#include <video/mipi_display.h> + +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> +#include <drm/drm_probe_helper.h> + +/* Manufacturer specific DSI commands */ +#define HX83112B_SETPOWER1 0xb1 +#define HX83112B_SETDISP 0xb2 +#define HX83112B_SETDRV 0xb4 +#define HX83112B_SETEXTC 0xb9 +#define HX83112B_SETBANK 0xbd +#define HX83112B_SETDGCLUT 0xc1 +#define HX83112B_SETDISMO 0xc2 +#define HX83112B_UNKNOWN1 0xc6 +#define HX83112B_SETPANEL 0xcc +#define HX83112B_UNKNOWN2 0xd1 +#define HX83112B_SETPOWER2 0xd2 +#define HX83112B_SETGIP0 0xd3 +#define HX83112B_SETGIP1 0xd5 +#define HX83112B_SETGIP2 0xd6 +#define HX83112B_SETGIP3 0xd8 +#define HX83112B_SETIDLE 0xdd +#define HX83112B_UNKNOWN3 0xe7 +#define HX83112B_UNKNOWN4 0xe9 + +struct hx83112b_panel { + struct drm_panel panel; + struct mipi_dsi_device *dsi; + struct regulator_bulk_data *supplies; + struct gpio_desc *reset_gpio; +}; + +static const struct regulator_bulk_data hx83112b_supplies[] = { + { .supply = "iovcc" }, + { .supply = "vsn" }, + { .supply = "vsp" }, +}; + +static inline struct hx83112b_panel *to_hx83112b_panel(struct drm_panel *panel) +{ + return container_of(panel, struct hx83112b_panel, panel); +} + +static void hx83112b_reset(struct hx83112b_panel *ctx) +{ + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + usleep_range(10000, 11000); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + usleep_range(10000, 11000); + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + usleep_range(10000, 11000); +} + +static int hx83112b_on(struct hx83112b_panel *ctx) +{ + struct mipi_dsi_multi_context dsi_ctx = { .dsi = ctx->dsi }; + + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETEXTC, 0x83, 0x11, 0x2b); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISMO, 0x08, 0x70); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x03); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISP, 0x04, 0x38, 0x08, 0x70); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETPOWER1, + 0xf8, 0x27, 0x27, 0x00, 0x00, 0x0b, 0x0e, + 0x0b, 0x0e, 0x33); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETPOWER2, 0x2d, 0x2d); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISP, + 0x80, 0x02, 0x18, 0x80, 0x70, 0x00, 0x08, + 0x1c, 0x08, 0x11, 0x05); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xd1); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISP, 0x00, 0x08); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x02); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISP, 0xb5, 0x0a); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETIDLE, + 0x00, 0x00, 0x08, 0x1c, 0x08, 0x34, 0x34, + 0x88); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDRV, + 0x65, 0x6b, 0x00, 0x00, 0xd0, 0xd4, 0x36, + 0xcf, 0x06, 0xce, 0x00, 0xce, 0x00, 0x00, + 0x00, 0x07, 0x00, 0x2a, 0x07, 0x01, 0x07, + 0x00, 0x00, 0x2a); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x03); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xc3); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDRV, 0x01, 0x67, 0x2a); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDGCLUT, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDGCLUT, + 0xff, 0xfb, 0xf9, 0xf6, 0xf4, 0xf1, 0xef, + 0xea, 0xe7, 0xe5, 0xe2, 0xdf, 0xdd, 0xda, + 0xd8, 0xd5, 0xd2, 0xcf, 0xcc, 0xc5, 0xbe, + 0xb7, 0xb0, 0xa8, 0xa0, 0x98, 0x8e, 0x85, + 0x7b, 0x72, 0x69, 0x5e, 0x53, 0x48, 0x3e, + 0x35, 0x2b, 0x22, 0x17, 0x0d, 0x09, 0x07, + 0x05, 0x01, 0x00, 0x26, 0xf0, 0x86, 0x25, + 0x6e, 0xb6, 0xdd, 0xf3, 0xd8, 0xcc, 0x9b, + 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x02); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDGCLUT, + 0xff, 0xfb, 0xf9, 0xf6, 0xf4, 0xf1, 0xef, + 0xea, 0xe7, 0xe5, 0xe2, 0xdf, 0xdd, 0xda, + 0xd8, 0xd5, 0xd2, 0xcf, 0xcc, 0xc5, 0xbe, + 0xb7, 0xb0, 0xa8, 0xa0, 0x98, 0x8e, 0x85, + 0x7b, 0x72, 0x69, 0x5e, 0x53, 0x48, 0x3e, + 0x35, 0x2b, 0x22, 0x17, 0x0d, 0x09, 0x07, + 0x05, 0x01, 0x00, 0x26, 0xf0, 0x86, 0x25, + 0x6e, 0xb6, 0xdd, 0xf3, 0xd8, 0xcc, 0x9b, + 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x03); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDGCLUT, + 0xff, 0xfb, 0xf9, 0xf6, 0xf4, 0xf1, 0xef, + 0xea, 0xe7, 0xe5, 0xe2, 0xdf, 0xdd, 0xda, + 0xd8, 0xd5, 0xd2, 0xcf, 0xcc, 0xc5, 0xbe, + 0xb7, 0xb0, 0xa8, 0xa0, 0x98, 0x8e, 0x85, + 0x7b, 0x72, 0x69, 0x5e, 0x53, 0x48, 0x3e, + 0x35, 0x2b, 0x22, 0x17, 0x0d, 0x09, 0x07, + 0x05, 0x01, 0x00, 0x26, 0xf0, 0x86, 0x25, + 0x6e, 0xb6, 0xdd, 0xf3, 0xd8, 0xcc, 0x9b, + 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETDISMO, 0xc8); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETPANEL, 0x08); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP0, + 0x81, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x13, 0x40, 0x04, 0x09, + 0x09, 0x0b, 0x0b, 0x32, 0x10, 0x08, 0x00, + 0x08, 0x32, 0x10, 0x08, 0x00, 0x08, 0x32, + 0x10, 0x08, 0x00, 0x08, 0x00, 0x00, 0x0a, + 0x08, 0x7b); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xc5); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN1, 0xf7); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xd4); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN1, 0x6e); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xef); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP0, 0x0c); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xc8); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP0, 0xa1); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP1, + 0x18, 0x18, 0x19, 0x18, 0x18, 0x20, 0x18, + 0x18, 0x18, 0x10, 0x10, 0x18, 0x18, 0x00, + 0x00, 0x18, 0x18, 0x01, 0x01, 0x18, 0x18, + 0x28, 0x28, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x2f, 0x2f, 0x30, 0x30, 0x31, 0x31, 0x35, + 0x35, 0x36, 0x36, 0x37, 0x37, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xfc, + 0xfc, 0x00, 0x00, 0xfc, 0xfc, 0x00, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP2, + 0x18, 0x18, 0x19, 0x18, 0x18, 0x20, 0x19, + 0x18, 0x18, 0x10, 0x10, 0x18, 0x18, 0x00, + 0x00, 0x18, 0x18, 0x01, 0x01, 0x18, 0x18, + 0x28, 0x28, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x2f, 0x2f, 0x30, 0x30, 0x31, 0x31, 0x35, + 0x35, 0x36, 0x36, 0x37, 0x37, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP3, + 0xaa, 0xaa, 0xaa, 0xaf, 0xea, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaf, 0xea, 0xaa, 0xaa, 0xaa, + 0xab, 0xaf, 0xef, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaf, 0xea, 0xaa); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP3, + 0xaa, 0xaa, 0xab, 0xaf, 0xea, 0xaa, 0xaa, + 0xaa, 0xae, 0xaf, 0xea, 0xaa); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x02); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP3, + 0xaa, 0xaa, 0xaa, 0xaf, 0xea, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaf, 0xea, 0xaa); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x03); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETGIP3, + 0xba, 0xaa, 0xaa, 0xaf, 0xea, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaf, 0xea, 0xaa, 0xba, 0xaa, + 0xaa, 0xaf, 0xea, 0xaa, 0xaa, 0xaa, 0xaa, + 0xaf, 0xea, 0xaa); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xe4); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, 0x17, 0x69); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, + 0x09, 0x09, 0x00, 0x07, 0xe8, 0x00, 0x26, + 0x00, 0x07, 0x00, 0x00, 0xe8, 0x32, 0x00, + 0xe9, 0x0a, 0x0a, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x12, 0x04); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, + 0x02, 0x00, 0x01, 0x20, 0x01, 0x18, 0x08, + 0xa8, 0x09); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x02); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, 0x20, 0x20, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x03); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, + 0x00, 0xdc, 0x11, 0x70, 0x00, 0x20); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0xc9); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN3, + 0x2a, 0xce, 0x02, 0x70, 0x01, 0x04); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN4, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_SETBANK, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, HX83112B_UNKNOWN2, 0x27); + mipi_dsi_dcs_exit_sleep_mode_multi(&dsi_ctx); + mipi_dsi_msleep(&dsi_ctx, 120); + mipi_dsi_dcs_set_display_on_multi(&dsi_ctx); + mipi_dsi_msleep(&dsi_ctx, 20); + mipi_dsi_dcs_set_display_brightness_multi(&dsi_ctx, 0x0000); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, MIPI_DCS_WRITE_CONTROL_DISPLAY, + 0x24); + mipi_dsi_dcs_set_tear_on_multi(&dsi_ctx, MIPI_DSI_DCS_TEAR_MODE_VBLANK); + + return dsi_ctx.accum_err; +} + +static int hx83112b_off(struct hx83112b_panel *ctx) +{ + struct mipi_dsi_multi_context dsi_ctx = { .dsi = ctx->dsi }; + + mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); + mipi_dsi_msleep(&dsi_ctx, 20); + mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + mipi_dsi_msleep(&dsi_ctx, 120); + + return dsi_ctx.accum_err; +} + +static int hx83112b_prepare(struct drm_panel *panel) +{ + struct hx83112b_panel *ctx = to_hx83112b_panel(panel); + struct device *dev = &ctx->dsi->dev; + int ret; + + ret = regulator_bulk_enable(ARRAY_SIZE(hx83112b_supplies), ctx->supplies); + if (ret < 0) { + dev_err(dev, "Failed to enable regulators: %d\n", ret); + return ret; + } + + hx83112b_reset(ctx); + + ret = hx83112b_on(ctx); + if (ret < 0) { + dev_err(dev, "Failed to initialize panel: %d\n", ret); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + regulator_bulk_disable(ARRAY_SIZE(hx83112b_supplies), ctx->supplies); + return ret; + } + + return 0; +} + +static int hx83112b_unprepare(struct drm_panel *panel) +{ + struct hx83112b_panel *ctx = to_hx83112b_panel(panel); + struct device *dev = &ctx->dsi->dev; + int ret; + + ret = hx83112b_off(ctx); + if (ret < 0) + dev_err(dev, "Failed to un-initialize panel: %d\n", ret); + + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + regulator_bulk_disable(ARRAY_SIZE(hx83112b_supplies), ctx->supplies); + + return 0; +} + +static const struct drm_display_mode hx83112b_mode = { + .clock = (1080 + 40 + 4 + 12) * (2160 + 32 + 2 + 2) * 60 / 1000, + .hdisplay = 1080, + .hsync_start = 1080 + 40, + .hsync_end = 1080 + 40 + 4, + .htotal = 1080 + 40 + 4 + 12, + .vdisplay = 2160, + .vsync_start = 2160 + 32, + .vsync_end = 2160 + 32 + 2, + .vtotal = 2160 + 32 + 2 + 2, + .width_mm = 65, + .height_mm = 128, + .type = DRM_MODE_TYPE_DRIVER, +}; + +static int hx83112b_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + return drm_connector_helper_get_modes_fixed(connector, &hx83112b_mode); +} + +static const struct drm_panel_funcs hx83112b_panel_funcs = { + .prepare = hx83112b_prepare, + .unprepare = hx83112b_unprepare, + .get_modes = hx83112b_get_modes, +}; + +static int hx83112b_bl_update_status(struct backlight_device *bl) +{ + struct mipi_dsi_device *dsi = bl_get_data(bl); + u16 brightness = backlight_get_brightness(bl); + int ret; + + dsi->mode_flags &= ~MIPI_DSI_MODE_LPM; + + ret = mipi_dsi_dcs_set_display_brightness_large(dsi, brightness); + if (ret < 0) + return ret; + + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + + return 0; +} + +static const struct backlight_ops hx83112b_bl_ops = { + .update_status = hx83112b_bl_update_status, +}; + +static struct backlight_device * +hx83112b_create_backlight(struct mipi_dsi_device *dsi) +{ + struct device *dev = &dsi->dev; + const struct backlight_properties props = { + .type = BACKLIGHT_RAW, + .brightness = 4095, + .max_brightness = 4095, + }; + + return devm_backlight_device_register(dev, dev_name(dev), dev, dsi, + &hx83112b_bl_ops, &props); +} + +static int hx83112b_probe(struct mipi_dsi_device *dsi) +{ + struct device *dev = &dsi->dev; + struct hx83112b_panel *ctx; + int ret; + + ctx = devm_drm_panel_alloc(dev, struct hx83112b_panel, panel, + &hx83112b_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = devm_regulator_bulk_get_const(dev, + ARRAY_SIZE(hx83112b_supplies), + hx83112b_supplies, + &ctx->supplies); + if (ret < 0) + return ret; + + ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(ctx->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio), + "Failed to get reset-gpios\n"); + + ctx->dsi = dsi; + mipi_dsi_set_drvdata(dsi, ctx); + + dsi->lanes = 4; + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_CLOCK_NON_CONTINUOUS | + MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_LPM; + + ctx->panel.prepare_prev_first = true; + + ctx->panel.backlight = hx83112b_create_backlight(dsi); + if (IS_ERR(ctx->panel.backlight)) + return dev_err_probe(dev, PTR_ERR(ctx->panel.backlight), + "Failed to create backlight\n"); + + drm_panel_add(&ctx->panel); + + ret = mipi_dsi_attach(dsi); + if (ret < 0) { + drm_panel_remove(&ctx->panel); + return dev_err_probe(dev, ret, "Failed to attach to DSI host\n"); + } + + return 0; +} + +static void hx83112b_remove(struct mipi_dsi_device *dsi) +{ + struct hx83112b_panel *ctx = mipi_dsi_get_drvdata(dsi); + int ret; + + ret = mipi_dsi_detach(dsi); + if (ret < 0) + dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret); + + drm_panel_remove(&ctx->panel); +} + +static const struct of_device_id hx83112b_of_match[] = { + { .compatible = "djn,98-03057-6598b-i" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, hx83112b_of_match); + +static struct mipi_dsi_driver hx83112b_driver = { + .probe = hx83112b_probe, + .remove = hx83112b_remove, + .driver = { + .name = "panel-himax-hx83112b", + .of_match_table = hx83112b_of_match, + }, +}; +module_mipi_dsi_driver(hx83112b_driver); + +MODULE_DESCRIPTION("DRM driver for hx83112b-equipped DSI panels"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-hydis-hv101hd1.c b/drivers/gpu/drm/panel/panel-hydis-hv101hd1.c new file mode 100644 index 000000000000..46426c388932 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-hydis-hv101hd1.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/array_size.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/gpio/consumer.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/property.h> +#include <linux/regulator/consumer.h> + +#include <video/mipi_display.h> + +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +struct hv101hd1 { + struct drm_panel panel; + struct mipi_dsi_device *dsi; + struct regulator_bulk_data *supplies; +}; + +static const struct regulator_bulk_data hv101hd1_supplies[] = { + { .supply = "vdd" }, + { .supply = "vio" }, +}; + +static inline struct hv101hd1 *to_hv101hd1(struct drm_panel *panel) +{ + return container_of(panel, struct hv101hd1, panel); +} + +static int hv101hd1_prepare(struct drm_panel *panel) +{ + struct hv101hd1 *hv = to_hv101hd1(panel); + struct mipi_dsi_multi_context ctx = { .dsi = hv->dsi }; + struct device *dev = &hv->dsi->dev; + int ret; + + ret = regulator_bulk_enable(ARRAY_SIZE(hv101hd1_supplies), hv->supplies); + if (ret) { + dev_err(dev, "error enabling regulators (%d)\n", ret); + return ret; + } + + mipi_dsi_dcs_exit_sleep_mode_multi(&ctx); + mipi_dsi_msleep(&ctx, 20); + + mipi_dsi_dcs_set_display_on_multi(&ctx); + mipi_dsi_msleep(&ctx, 20); + + return 0; +} + +static int hv101hd1_disable(struct drm_panel *panel) +{ + struct hv101hd1 *hv = to_hv101hd1(panel); + struct mipi_dsi_multi_context ctx = { .dsi = hv->dsi }; + + mipi_dsi_dcs_set_display_off_multi(&ctx); + mipi_dsi_msleep(&ctx, 120); + mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + mipi_dsi_msleep(&ctx, 20); + + return 0; +} + +static int hv101hd1_unprepare(struct drm_panel *panel) +{ + struct hv101hd1 *hv = to_hv101hd1(panel); + + return regulator_bulk_disable(ARRAY_SIZE(hv101hd1_supplies), + hv->supplies); +} + +static const struct drm_display_mode hv101hd1_mode = { + .clock = (1366 + 74 + 36 + 24) * (768 + 21 + 7 + 4) * 60 / 1000, + .hdisplay = 1366, + .hsync_start = 1366 + 74, + .hsync_end = 1366 + 74 + 36, + .htotal = 1366 + 74 + 36 + 24, + .vdisplay = 768, + .vsync_start = 768 + 21, + .vsync_end = 768 + 21 + 7, + .vtotal = 768 + 21 + 7 + 4, + .width_mm = 140, + .height_mm = 220, +}; + +static int hv101hd1_get_modes(struct drm_panel *panel, struct drm_connector *connector) +{ + struct drm_display_mode *mode; + + mode = drm_mode_duplicate(connector->dev, &hv101hd1_mode); + if (!mode) + return -ENOMEM; + + drm_mode_set_name(mode); + + mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + + connector->display_info.width_mm = mode->width_mm; + connector->display_info.height_mm = mode->height_mm; + + drm_mode_probed_add(connector, mode); + + return 1; +} + +static const struct drm_panel_funcs hv101hd1_panel_funcs = { + .prepare = hv101hd1_prepare, + .disable = hv101hd1_disable, + .unprepare = hv101hd1_unprepare, + .get_modes = hv101hd1_get_modes, +}; + +static int hv101hd1_probe(struct mipi_dsi_device *dsi) +{ + struct device *dev = &dsi->dev; + struct hv101hd1 *hv; + int ret; + + hv = devm_drm_panel_alloc(dev, struct hv101hd1, panel, + &hv101hd1_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(hv)) + return PTR_ERR(hv); + + ret = devm_regulator_bulk_get_const(dev, ARRAY_SIZE(hv101hd1_supplies), + hv101hd1_supplies, &hv->supplies); + if (ret) + return dev_err_probe(dev, ret, "failed to get regulators\n"); + + hv->dsi = dsi; + mipi_dsi_set_drvdata(dsi, hv); + + dsi->lanes = 2; + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_LPM; + + ret = drm_panel_of_backlight(&hv->panel); + if (ret) + return dev_err_probe(dev, ret, "Failed to get backlight\n"); + + drm_panel_add(&hv->panel); + + ret = mipi_dsi_attach(dsi); + if (ret) { + drm_panel_remove(&hv->panel); + return dev_err_probe(dev, ret, "Failed to attach to DSI host\n"); + } + + return 0; +} + +static void hv101hd1_remove(struct mipi_dsi_device *dsi) +{ + struct hv101hd1 *hv = mipi_dsi_get_drvdata(dsi); + int ret; + + ret = mipi_dsi_detach(dsi); + if (ret < 0) + dev_err(&dsi->dev, + "Failed to detach from DSI host: %d\n", ret); + + drm_panel_remove(&hv->panel); +} + +static const struct of_device_id hv101hd1_of_match[] = { + { .compatible = "hydis,hv101hd1" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, hv101hd1_of_match); + +static struct mipi_dsi_driver hv101hd1_driver = { + .driver = { + .name = "panel-hv101hd1", + .of_match_table = hv101hd1_of_match, + }, + .probe = hv101hd1_probe, + .remove = hv101hd1_remove, +}; +module_mipi_dsi_driver(hv101hd1_driver); + +MODULE_AUTHOR("Svyatoslav Ryhel <clamor95@gmail.com>"); +MODULE_DESCRIPTION("DRM driver for Hydis HV101HD1 panel"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index ac433345a179..ad4993b2f92a 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -1417,6 +1417,200 @@ static const struct ili9881c_instr rpi_7inch_init[] = { ILI9881C_COMMAND_INSTR(0xD3, 0x39), }; +static const struct ili9881c_instr bsd1218_a101kl68_init[] = { + ILI9881C_SWITCH_PAGE_INSTR(3), + ILI9881C_COMMAND_INSTR(0x01, 0x00), + ILI9881C_COMMAND_INSTR(0x02, 0x00), + ILI9881C_COMMAND_INSTR(0x03, 0x55), + ILI9881C_COMMAND_INSTR(0x04, 0x55), + ILI9881C_COMMAND_INSTR(0x05, 0x03), + ILI9881C_COMMAND_INSTR(0x06, 0x06), + ILI9881C_COMMAND_INSTR(0x07, 0x00), + ILI9881C_COMMAND_INSTR(0x08, 0x07), + ILI9881C_COMMAND_INSTR(0x09, 0x00), + ILI9881C_COMMAND_INSTR(0x0a, 0x00), + ILI9881C_COMMAND_INSTR(0x0b, 0x00), + ILI9881C_COMMAND_INSTR(0x0c, 0x00), + ILI9881C_COMMAND_INSTR(0x0d, 0x00), + ILI9881C_COMMAND_INSTR(0x0e, 0x00), + ILI9881C_COMMAND_INSTR(0x0f, 0x00), + ILI9881C_COMMAND_INSTR(0x10, 0x00), + ILI9881C_COMMAND_INSTR(0x11, 0x00), + ILI9881C_COMMAND_INSTR(0x12, 0x00), + ILI9881C_COMMAND_INSTR(0x13, 0x00), + ILI9881C_COMMAND_INSTR(0x14, 0x00), + ILI9881C_COMMAND_INSTR(0x15, 0x00), + ILI9881C_COMMAND_INSTR(0x16, 0x00), + ILI9881C_COMMAND_INSTR(0x17, 0x00), + ILI9881C_COMMAND_INSTR(0x18, 0x00), + ILI9881C_COMMAND_INSTR(0x19, 0x00), + ILI9881C_COMMAND_INSTR(0x1a, 0x00), + ILI9881C_COMMAND_INSTR(0x1b, 0x00), + ILI9881C_COMMAND_INSTR(0x1c, 0x00), + ILI9881C_COMMAND_INSTR(0x1d, 0x00), + ILI9881C_COMMAND_INSTR(0x1e, 0xc0), + ILI9881C_COMMAND_INSTR(0x1f, 0x80), + ILI9881C_COMMAND_INSTR(0x20, 0x04), + ILI9881C_COMMAND_INSTR(0x21, 0x03), + ILI9881C_COMMAND_INSTR(0x22, 0x00), + ILI9881C_COMMAND_INSTR(0x23, 0x00), + ILI9881C_COMMAND_INSTR(0x24, 0x00), + ILI9881C_COMMAND_INSTR(0x25, 0x00), + ILI9881C_COMMAND_INSTR(0x26, 0x00), + ILI9881C_COMMAND_INSTR(0x27, 0x00), + ILI9881C_COMMAND_INSTR(0x28, 0x33), + ILI9881C_COMMAND_INSTR(0x29, 0x33), + ILI9881C_COMMAND_INSTR(0x2a, 0x00), + ILI9881C_COMMAND_INSTR(0x2b, 0x00), + ILI9881C_COMMAND_INSTR(0x2c, 0x00), + ILI9881C_COMMAND_INSTR(0x2d, 0x00), + ILI9881C_COMMAND_INSTR(0x2e, 0x00), + ILI9881C_COMMAND_INSTR(0x2f, 0x00), + ILI9881C_COMMAND_INSTR(0x30, 0x00), + ILI9881C_COMMAND_INSTR(0x31, 0x00), + ILI9881C_COMMAND_INSTR(0x32, 0x00), + ILI9881C_COMMAND_INSTR(0x33, 0x00), + ILI9881C_COMMAND_INSTR(0x34, 0x04), + ILI9881C_COMMAND_INSTR(0x35, 0x00), + ILI9881C_COMMAND_INSTR(0x36, 0x00), + ILI9881C_COMMAND_INSTR(0x37, 0x00), + ILI9881C_COMMAND_INSTR(0x38, 0x3c), + ILI9881C_COMMAND_INSTR(0x39, 0x00), + ILI9881C_COMMAND_INSTR(0x3a, 0x00), + ILI9881C_COMMAND_INSTR(0x3b, 0x00), + ILI9881C_COMMAND_INSTR(0x3c, 0x00), + ILI9881C_COMMAND_INSTR(0x3d, 0x00), + ILI9881C_COMMAND_INSTR(0x3e, 0x00), + ILI9881C_COMMAND_INSTR(0x3f, 0x00), + ILI9881C_COMMAND_INSTR(0x40, 0x00), + ILI9881C_COMMAND_INSTR(0x41, 0x00), + ILI9881C_COMMAND_INSTR(0x42, 0x00), + ILI9881C_COMMAND_INSTR(0x43, 0x00), + ILI9881C_COMMAND_INSTR(0x44, 0x00), + ILI9881C_COMMAND_INSTR(0x50, 0x00), + ILI9881C_COMMAND_INSTR(0x51, 0x11), + ILI9881C_COMMAND_INSTR(0x52, 0x44), + ILI9881C_COMMAND_INSTR(0x53, 0x55), + ILI9881C_COMMAND_INSTR(0x54, 0x88), + ILI9881C_COMMAND_INSTR(0x55, 0xab), + ILI9881C_COMMAND_INSTR(0x56, 0x00), + ILI9881C_COMMAND_INSTR(0x57, 0x11), + ILI9881C_COMMAND_INSTR(0x58, 0x22), + ILI9881C_COMMAND_INSTR(0x59, 0x33), + ILI9881C_COMMAND_INSTR(0x5a, 0x44), + ILI9881C_COMMAND_INSTR(0x5b, 0x55), + ILI9881C_COMMAND_INSTR(0x5c, 0x66), + ILI9881C_COMMAND_INSTR(0x5d, 0x77), + ILI9881C_COMMAND_INSTR(0x5e, 0x00), + ILI9881C_COMMAND_INSTR(0x5f, 0x02), + ILI9881C_COMMAND_INSTR(0x60, 0x02), + ILI9881C_COMMAND_INSTR(0x61, 0x0a), + ILI9881C_COMMAND_INSTR(0x62, 0x09), + ILI9881C_COMMAND_INSTR(0x63, 0x08), + ILI9881C_COMMAND_INSTR(0x64, 0x13), + ILI9881C_COMMAND_INSTR(0x65, 0x12), + ILI9881C_COMMAND_INSTR(0x66, 0x11), + ILI9881C_COMMAND_INSTR(0x67, 0x10), + ILI9881C_COMMAND_INSTR(0x68, 0x0f), + ILI9881C_COMMAND_INSTR(0x69, 0x0e), + ILI9881C_COMMAND_INSTR(0x6a, 0x0d), + ILI9881C_COMMAND_INSTR(0x6b, 0x0c), + ILI9881C_COMMAND_INSTR(0x6c, 0x06), + ILI9881C_COMMAND_INSTR(0x6d, 0x07), + ILI9881C_COMMAND_INSTR(0x6e, 0x02), + ILI9881C_COMMAND_INSTR(0x6f, 0x02), + ILI9881C_COMMAND_INSTR(0x70, 0x02), + ILI9881C_COMMAND_INSTR(0x71, 0x02), + ILI9881C_COMMAND_INSTR(0x72, 0x02), + ILI9881C_COMMAND_INSTR(0x73, 0x02), + ILI9881C_COMMAND_INSTR(0x74, 0x02), + ILI9881C_COMMAND_INSTR(0x75, 0x02), + ILI9881C_COMMAND_INSTR(0x76, 0x02), + ILI9881C_COMMAND_INSTR(0x77, 0x0a), + ILI9881C_COMMAND_INSTR(0x78, 0x06), + ILI9881C_COMMAND_INSTR(0x79, 0x07), + ILI9881C_COMMAND_INSTR(0x7a, 0x10), + ILI9881C_COMMAND_INSTR(0x7b, 0x11), + ILI9881C_COMMAND_INSTR(0x7c, 0x12), + ILI9881C_COMMAND_INSTR(0x7d, 0x13), + ILI9881C_COMMAND_INSTR(0x7e, 0x0c), + ILI9881C_COMMAND_INSTR(0x7f, 0x0d), + ILI9881C_COMMAND_INSTR(0x80, 0x0e), + ILI9881C_COMMAND_INSTR(0x81, 0x0f), + ILI9881C_COMMAND_INSTR(0x82, 0x09), + ILI9881C_COMMAND_INSTR(0x83, 0x08), + ILI9881C_COMMAND_INSTR(0x84, 0x02), + ILI9881C_COMMAND_INSTR(0x85, 0x02), + ILI9881C_COMMAND_INSTR(0x86, 0x02), + ILI9881C_COMMAND_INSTR(0x87, 0x02), + ILI9881C_COMMAND_INSTR(0x88, 0x02), + ILI9881C_COMMAND_INSTR(0x89, 0x02), + ILI9881C_COMMAND_INSTR(0x8a, 0x02), + + ILI9881C_SWITCH_PAGE_INSTR(4), + ILI9881C_COMMAND_INSTR(0x6e, 0x2a), + ILI9881C_COMMAND_INSTR(0x6f, 0x37), + ILI9881C_COMMAND_INSTR(0x3a, 0x24), + ILI9881C_COMMAND_INSTR(0x8d, 0x19), + ILI9881C_COMMAND_INSTR(0x87, 0xba), + ILI9881C_COMMAND_INSTR(0xb2, 0xd1), + ILI9881C_COMMAND_INSTR(0x88, 0x0b), + ILI9881C_COMMAND_INSTR(0x38, 0x01), + ILI9881C_COMMAND_INSTR(0x39, 0x00), + ILI9881C_COMMAND_INSTR(0xb5, 0x02), + ILI9881C_COMMAND_INSTR(0x31, 0x25), + ILI9881C_COMMAND_INSTR(0x3b, 0x98), + + ILI9881C_SWITCH_PAGE_INSTR(1), + ILI9881C_COMMAND_INSTR(0x22, 0x0a), + ILI9881C_COMMAND_INSTR(0x31, 0x0c), + ILI9881C_COMMAND_INSTR(0x53, 0x40), + ILI9881C_COMMAND_INSTR(0x55, 0x45), + ILI9881C_COMMAND_INSTR(0x50, 0xb7), + ILI9881C_COMMAND_INSTR(0x51, 0xb2), + ILI9881C_COMMAND_INSTR(0x60, 0x07), + ILI9881C_COMMAND_INSTR(0xa0, 0x22), + ILI9881C_COMMAND_INSTR(0xa1, 0x3f), + ILI9881C_COMMAND_INSTR(0xa2, 0x4e), + ILI9881C_COMMAND_INSTR(0xa3, 0x17), + ILI9881C_COMMAND_INSTR(0xa4, 0x1a), + ILI9881C_COMMAND_INSTR(0xa5, 0x2d), + ILI9881C_COMMAND_INSTR(0xa6, 0x21), + ILI9881C_COMMAND_INSTR(0xa7, 0x22), + ILI9881C_COMMAND_INSTR(0xa8, 0xc4), + ILI9881C_COMMAND_INSTR(0xa9, 0x1b), + ILI9881C_COMMAND_INSTR(0xaa, 0x25), + ILI9881C_COMMAND_INSTR(0xab, 0xa7), + ILI9881C_COMMAND_INSTR(0xac, 0x1a), + ILI9881C_COMMAND_INSTR(0xad, 0x19), + ILI9881C_COMMAND_INSTR(0xae, 0x4b), + ILI9881C_COMMAND_INSTR(0xaf, 0x1f), + ILI9881C_COMMAND_INSTR(0xb0, 0x2a), + ILI9881C_COMMAND_INSTR(0xb1, 0x59), + ILI9881C_COMMAND_INSTR(0xb2, 0x64), + ILI9881C_COMMAND_INSTR(0xb3, 0x3f), + ILI9881C_COMMAND_INSTR(0xc0, 0x22), + ILI9881C_COMMAND_INSTR(0xc1, 0x48), + ILI9881C_COMMAND_INSTR(0xc2, 0x59), + ILI9881C_COMMAND_INSTR(0xc3, 0x15), + ILI9881C_COMMAND_INSTR(0xc4, 0x15), + ILI9881C_COMMAND_INSTR(0xc5, 0x28), + ILI9881C_COMMAND_INSTR(0xc6, 0x1c), + ILI9881C_COMMAND_INSTR(0xc7, 0x1e), + ILI9881C_COMMAND_INSTR(0xc8, 0xc4), + ILI9881C_COMMAND_INSTR(0xc9, 0x1c), + ILI9881C_COMMAND_INSTR(0xca, 0x2b), + ILI9881C_COMMAND_INSTR(0xcb, 0xa3), + ILI9881C_COMMAND_INSTR(0xcc, 0x1f), + ILI9881C_COMMAND_INSTR(0xcd, 0x1e), + ILI9881C_COMMAND_INSTR(0xce, 0x52), + ILI9881C_COMMAND_INSTR(0xcf, 0x24), + ILI9881C_COMMAND_INSTR(0xd0, 0x2a), + ILI9881C_COMMAND_INSTR(0xd1, 0x58), + ILI9881C_COMMAND_INSTR(0xd2, 0x68), + ILI9881C_COMMAND_INSTR(0xd3, 0x3f), +}; + static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel) { return container_of(panel, struct ili9881c, panel); @@ -1433,33 +1627,24 @@ static inline struct ili9881c *panel_to_ili9881c(struct drm_panel *panel) * So before any attempt at sending a command or data, we have to be * sure if we're in the right page or not. */ -static int ili9881c_switch_page(struct ili9881c *ctx, u8 page) +static void ili9881c_switch_page(struct mipi_dsi_multi_context *mctx, u8 page) { u8 buf[4] = { 0xff, 0x98, 0x81, page }; - int ret; - - ret = mipi_dsi_dcs_write_buffer(ctx->dsi, buf, sizeof(buf)); - if (ret < 0) - return ret; - return 0; + mipi_dsi_dcs_write_buffer_multi(mctx, buf, sizeof(buf)); } -static int ili9881c_send_cmd_data(struct ili9881c *ctx, u8 cmd, u8 data) +static void ili9881c_send_cmd_data(struct mipi_dsi_multi_context *mctx, u8 cmd, u8 data) { u8 buf[2] = { cmd, data }; - int ret; - - ret = mipi_dsi_dcs_write_buffer(ctx->dsi, buf, sizeof(buf)); - if (ret < 0) - return ret; - return 0; + mipi_dsi_dcs_write_buffer_multi(mctx, buf, sizeof(buf)); } static int ili9881c_prepare(struct drm_panel *panel) { struct ili9881c *ctx = panel_to_ili9881c(panel); + struct mipi_dsi_multi_context mctx = { .dsi = ctx->dsi }; unsigned int i; int ret; @@ -1480,61 +1665,39 @@ static int ili9881c_prepare(struct drm_panel *panel) const struct ili9881c_instr *instr = &ctx->desc->init[i]; if (instr->op == ILI9881C_SWITCH_PAGE) - ret = ili9881c_switch_page(ctx, instr->arg.page); + ili9881c_switch_page(&mctx, instr->arg.page); else if (instr->op == ILI9881C_COMMAND) - ret = ili9881c_send_cmd_data(ctx, instr->arg.cmd.cmd, - instr->arg.cmd.data); - - if (ret) - return ret; + ili9881c_send_cmd_data(&mctx, instr->arg.cmd.cmd, + instr->arg.cmd.data); } - ret = ili9881c_switch_page(ctx, 0); - if (ret) - return ret; - - if (ctx->address_mode) { - ret = mipi_dsi_dcs_write(ctx->dsi, MIPI_DCS_SET_ADDRESS_MODE, - &ctx->address_mode, - sizeof(ctx->address_mode)); - if (ret < 0) - return ret; - } - - ret = mipi_dsi_dcs_set_tear_on(ctx->dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK); - if (ret) - return ret; - - ret = mipi_dsi_dcs_exit_sleep_mode(ctx->dsi); - if (ret) - return ret; + ili9881c_switch_page(&mctx, 0); - return 0; -} + if (ctx->address_mode) + ili9881c_send_cmd_data(&mctx, MIPI_DCS_SET_ADDRESS_MODE, + ctx->address_mode); -static int ili9881c_enable(struct drm_panel *panel) -{ - struct ili9881c *ctx = panel_to_ili9881c(panel); - - msleep(120); - - mipi_dsi_dcs_set_display_on(ctx->dsi); + mipi_dsi_dcs_set_tear_on_multi(&mctx, MIPI_DSI_DCS_TEAR_MODE_VBLANK); + mipi_dsi_dcs_exit_sleep_mode_multi(&mctx); + mipi_dsi_msleep(&mctx, 120); + mipi_dsi_dcs_set_display_on_multi(&mctx); + if (mctx.accum_err) + goto disable_power; return 0; -} -static int ili9881c_disable(struct drm_panel *panel) -{ - struct ili9881c *ctx = panel_to_ili9881c(panel); - - return mipi_dsi_dcs_set_display_off(ctx->dsi); +disable_power: + regulator_disable(ctx->power); + return mctx.accum_err; } static int ili9881c_unprepare(struct drm_panel *panel) { struct ili9881c *ctx = panel_to_ili9881c(panel); + struct mipi_dsi_multi_context mctx = { .dsi = ctx->dsi }; - mipi_dsi_dcs_enter_sleep_mode(ctx->dsi); + mipi_dsi_dcs_set_display_off_multi(&mctx); + mipi_dsi_dcs_enter_sleep_mode_multi(&mctx); regulator_disable(ctx->power); gpiod_set_value_cansleep(ctx->reset, 1); @@ -1660,6 +1823,23 @@ static const struct drm_display_mode rpi_7inch_default_mode = { .height_mm = 151, }; +static const struct drm_display_mode bsd1218_a101kl68_default_mode = { + .clock = 70000, + + .hdisplay = 800, + .hsync_start = 800 + 40, + .hsync_end = 800 + 40 + 20, + .htotal = 800 + 40 + 20 + 20, + + .vdisplay = 1280, + .vsync_start = 1280 + 20, + .vsync_end = 1280 + 20 + 4, + .vtotal = 1280 + 20 + 4 + 20, + + .width_mm = 120, + .height_mm = 170, +}; + static int ili9881c_get_modes(struct drm_panel *panel, struct drm_connector *connector) { @@ -1706,8 +1886,6 @@ static enum drm_panel_orientation ili9881c_get_orientation(struct drm_panel *pan static const struct drm_panel_funcs ili9881c_funcs = { .prepare = ili9881c_prepare, .unprepare = ili9881c_unprepare, - .enable = ili9881c_enable, - .disable = ili9881c_disable, .get_modes = ili9881c_get_modes, .get_orientation = ili9881c_get_orientation, }; @@ -1830,8 +2008,18 @@ static const struct ili9881c_desc rpi_7inch_desc = { .lanes = 2, }; +static const struct ili9881c_desc bsd1218_a101kl68_desc = { + .init = bsd1218_a101kl68_init, + .init_length = ARRAY_SIZE(bsd1218_a101kl68_init), + .mode = &bsd1218_a101kl68_default_mode, + .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET, + .lanes = 4, +}; + static const struct of_device_id ili9881c_of_match[] = { { .compatible = "bananapi,lhr050h41", .data = &lhr050h41_desc }, + { .compatible = "bestar,bsd1218-a101kl68", .data = &bsd1218_a101kl68_desc }, { .compatible = "feixin,k101-im2byl02", .data = &k101_im2byl02_desc }, { .compatible = "startek,kd050hdfia020", .data = &kd050hdfia020_desc }, { .compatible = "tdo,tl050hdv35", .data = &tl050hdv35_desc }, diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c index 3c24a63b6be8..85c7059be214 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c @@ -614,8 +614,6 @@ static int ili9882t_add(struct ili9882t *ili) gpiod_set_value(ili->enable_gpio, 0); - drm_panel_init(&ili->base, dev, &ili9882t_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &ili->orientation); if (err < 0) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); @@ -640,9 +638,11 @@ static int ili9882t_probe(struct mipi_dsi_device *dsi) int ret; const struct panel_desc *desc; - ili = devm_kzalloc(&dsi->dev, sizeof(*ili), GFP_KERNEL); - if (!ili) - return -ENOMEM; + ili = devm_drm_panel_alloc(&dsi->dev, __typeof(*ili), base, + &ili9882t_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ili)) + return PTR_ERR(ili); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c index 5b5082efb282..23462065d726 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c +++ b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c @@ -81,25 +81,25 @@ static int jdi_panel_disable(struct drm_panel *panel) static int jdi_panel_unprepare(struct drm_panel *panel) { struct jdi_panel *jdi = to_panel_jdi(panel); - int ret; - ret = mipi_dsi_dcs_set_display_off(jdi->link1); - if (ret < 0) - dev_err(panel->dev, "failed to set display off: %d\n", ret); + /* + * One context per panel since we'll continue trying to shut down the + * other panel even if one isn't responding. + */ + struct mipi_dsi_multi_context dsi_ctx1 = { .dsi = jdi->link1 }; + struct mipi_dsi_multi_context dsi_ctx2 = { .dsi = jdi->link2 }; - ret = mipi_dsi_dcs_set_display_off(jdi->link2); - if (ret < 0) - dev_err(panel->dev, "failed to set display off: %d\n", ret); + mipi_dsi_dcs_set_display_off_multi(&dsi_ctx1); + mipi_dsi_dcs_set_display_off_multi(&dsi_ctx2); /* Specified by JDI @ 50ms, subject to change */ msleep(50); - ret = mipi_dsi_dcs_enter_sleep_mode(jdi->link1); - if (ret < 0) - dev_err(panel->dev, "failed to enter sleep mode: %d\n", ret); - ret = mipi_dsi_dcs_enter_sleep_mode(jdi->link2); - if (ret < 0) - dev_err(panel->dev, "failed to enter sleep mode: %d\n", ret); + /* Doesn't hurt to try sleep mode even if display off fails */ + dsi_ctx1.accum_err = 0; + dsi_ctx2.accum_err = 0; + mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx1); + mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx2); /* Specified by JDI @ 150ms, subject to change */ msleep(150); @@ -123,72 +123,46 @@ static int jdi_panel_unprepare(struct drm_panel *panel) /* Specified by JDI @ 20ms, subject to change */ msleep(20); - return ret; + return 0; } -static int jdi_setup_symmetrical_split(struct mipi_dsi_device *left, - struct mipi_dsi_device *right, - const struct drm_display_mode *mode) +static void jdi_setup_symmetrical_split(struct mipi_dsi_multi_context *dsi_ctx, + struct mipi_dsi_device *left, + struct mipi_dsi_device *right, + const struct drm_display_mode *mode) { - int err; - - err = mipi_dsi_dcs_set_column_address(left, 0, mode->hdisplay / 2 - 1); - if (err < 0) { - dev_err(&left->dev, "failed to set column address: %d\n", err); - return err; - } - - err = mipi_dsi_dcs_set_column_address(right, 0, mode->hdisplay / 2 - 1); - if (err < 0) { - dev_err(&right->dev, "failed to set column address: %d\n", err); - return err; - } - - err = mipi_dsi_dcs_set_page_address(left, 0, mode->vdisplay - 1); - if (err < 0) { - dev_err(&left->dev, "failed to set page address: %d\n", err); - return err; - } - - err = mipi_dsi_dcs_set_page_address(right, 0, mode->vdisplay - 1); - if (err < 0) { - dev_err(&right->dev, "failed to set page address: %d\n", err); - return err; - } - - return 0; + mipi_dsi_dual(mipi_dsi_dcs_set_column_address_multi, + dsi_ctx, left, right, + 0, mode->hdisplay / 2 - 1); + mipi_dsi_dual(mipi_dsi_dcs_set_page_address_multi, + dsi_ctx, left, right, + 0, mode->vdisplay - 1); } -static int jdi_write_dcdc_registers(struct jdi_panel *jdi) +static void jdi_write_dcdc_registers(struct mipi_dsi_multi_context *dsi_ctx, + struct jdi_panel *jdi) { /* Clear the manufacturer command access protection */ - mipi_dsi_generic_write_seq(jdi->link1, MCS_CMD_ACS_PROT, - MCS_CMD_ACS_PROT_OFF); - mipi_dsi_generic_write_seq(jdi->link2, MCS_CMD_ACS_PROT, - MCS_CMD_ACS_PROT_OFF); + mipi_dsi_dual_generic_write_seq_multi(dsi_ctx, jdi->link1, jdi->link2, + MCS_CMD_ACS_PROT, + MCS_CMD_ACS_PROT_OFF); /* - * Change the VGH/VGL divide rations to move the noise generated by the + * Change the VGH/VGL divide ratios to move the noise generated by the * TCONN. This should hopefully avoid interaction with the backlight * controller. */ - mipi_dsi_generic_write_seq(jdi->link1, MCS_PWR_CTRL_FUNC, - MCS_PWR_CTRL_PARAM1_VGH_330_DIV | - MCS_PWR_CTRL_PARAM1_DEFAULT, - MCS_PWR_CTRL_PARAM2_VGL_410_DIV | - MCS_PWR_CTRL_PARAM2_DEFAULT); - - mipi_dsi_generic_write_seq(jdi->link2, MCS_PWR_CTRL_FUNC, - MCS_PWR_CTRL_PARAM1_VGH_330_DIV | - MCS_PWR_CTRL_PARAM1_DEFAULT, - MCS_PWR_CTRL_PARAM2_VGL_410_DIV | - MCS_PWR_CTRL_PARAM2_DEFAULT); - - return 0; + mipi_dsi_dual_generic_write_seq_multi(dsi_ctx, jdi->link1, jdi->link2, + MCS_PWR_CTRL_FUNC, + MCS_PWR_CTRL_PARAM1_VGH_330_DIV | + MCS_PWR_CTRL_PARAM1_DEFAULT, + MCS_PWR_CTRL_PARAM2_VGL_410_DIV | + MCS_PWR_CTRL_PARAM2_DEFAULT); } static int jdi_panel_prepare(struct drm_panel *panel) { struct jdi_panel *jdi = to_panel_jdi(panel); + struct mipi_dsi_multi_context dsi_ctx = {}; int err; /* Disable backlight to avoid showing random pixels @@ -231,86 +205,36 @@ static int jdi_panel_prepare(struct drm_panel *panel) * put in place to communicate the configuration back to the DSI host * controller. */ - err = jdi_setup_symmetrical_split(jdi->link1, jdi->link2, - jdi->mode); - if (err < 0) { - dev_err(panel->dev, "failed to set up symmetrical split: %d\n", - err); - goto poweroff; - } + jdi_setup_symmetrical_split(&dsi_ctx, jdi->link1, jdi->link2, + jdi->mode); - err = mipi_dsi_dcs_set_tear_scanline(jdi->link1, - jdi->mode->vdisplay - 16); - if (err < 0) { - dev_err(panel->dev, "failed to set tear scanline: %d\n", err); - goto poweroff; - } + mipi_dsi_dual(mipi_dsi_dcs_set_tear_scanline_multi, + &dsi_ctx, jdi->link1, jdi->link2, + jdi->mode->vdisplay - 16); - err = mipi_dsi_dcs_set_tear_scanline(jdi->link2, - jdi->mode->vdisplay - 16); - if (err < 0) { - dev_err(panel->dev, "failed to set tear scanline: %d\n", err); - goto poweroff; - } + mipi_dsi_dual(mipi_dsi_dcs_set_tear_on_multi, + &dsi_ctx, jdi->link1, jdi->link2, + MIPI_DSI_DCS_TEAR_MODE_VBLANK); - err = mipi_dsi_dcs_set_tear_on(jdi->link1, - MIPI_DSI_DCS_TEAR_MODE_VBLANK); - if (err < 0) { - dev_err(panel->dev, "failed to set tear on: %d\n", err); - goto poweroff; - } - - err = mipi_dsi_dcs_set_tear_on(jdi->link2, - MIPI_DSI_DCS_TEAR_MODE_VBLANK); - if (err < 0) { - dev_err(panel->dev, "failed to set tear on: %d\n", err); - goto poweroff; - } - - err = mipi_dsi_dcs_set_pixel_format(jdi->link1, MIPI_DCS_PIXEL_FMT_24BIT); - if (err < 0) { - dev_err(panel->dev, "failed to set pixel format: %d\n", err); - goto poweroff; - } - - err = mipi_dsi_dcs_set_pixel_format(jdi->link2, MIPI_DCS_PIXEL_FMT_24BIT); - if (err < 0) { - dev_err(panel->dev, "failed to set pixel format: %d\n", err); - goto poweroff; - } - - err = mipi_dsi_dcs_exit_sleep_mode(jdi->link1); - if (err < 0) { - dev_err(panel->dev, "failed to exit sleep mode: %d\n", err); - goto poweroff; - } + mipi_dsi_dual(mipi_dsi_dcs_set_pixel_format_multi, + &dsi_ctx, jdi->link1, jdi->link2, + MIPI_DCS_PIXEL_FMT_24BIT); - err = mipi_dsi_dcs_exit_sleep_mode(jdi->link2); - if (err < 0) { - dev_err(panel->dev, "failed to exit sleep mode: %d\n", err); - goto poweroff; - } + mipi_dsi_dual(mipi_dsi_dcs_exit_sleep_mode_multi, + &dsi_ctx, jdi->link1, jdi->link2); - err = jdi_write_dcdc_registers(jdi); - if (err < 0) { - dev_err(panel->dev, "failed to write dcdc registers: %d\n", err); - goto poweroff; - } + jdi_write_dcdc_registers(&dsi_ctx, jdi); /* - * We need to wait 150ms between mipi_dsi_dcs_exit_sleep_mode() and - * mipi_dsi_dcs_set_display_on(). + * We need to wait 150ms between mipi_dsi_dcs_exit_sleep_mode_multi() + * and mipi_dsi_dcs_set_display_on_multi(). */ - msleep(150); + mipi_dsi_msleep(&dsi_ctx, 150); - err = mipi_dsi_dcs_set_display_on(jdi->link1); - if (err < 0) { - dev_err(panel->dev, "failed to set display on: %d\n", err); - goto poweroff; - } + mipi_dsi_dual(mipi_dsi_dcs_set_display_on_multi, + &dsi_ctx, jdi->link1, jdi->link2); - err = mipi_dsi_dcs_set_display_on(jdi->link2); - if (err < 0) { - dev_err(panel->dev, "failed to set display on: %d\n", err); + if (dsi_ctx.accum_err < 0) { + err = dsi_ctx.accum_err; goto poweroff; } @@ -435,9 +359,6 @@ static int jdi_panel_add(struct jdi_panel *jdi) return dev_err_probe(dev, PTR_ERR(jdi->backlight), "failed to create backlight\n"); - drm_panel_init(&jdi->base, &jdi->link1->dev, &jdi_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&jdi->base); return 0; @@ -475,10 +396,13 @@ static int jdi_panel_dsi_probe(struct mipi_dsi_device *dsi) /* register a panel for only the DSI-LINK1 interface */ if (secondary) { - jdi = devm_kzalloc(&dsi->dev, sizeof(*jdi), GFP_KERNEL); - if (!jdi) { + jdi = devm_drm_panel_alloc(&dsi->dev, __typeof(*jdi), + base, &jdi_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(jdi)) { put_device(&secondary->dev); - return -ENOMEM; + return PTR_ERR(jdi); } mipi_dsi_set_drvdata(dsi, jdi); diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c index b1ce186de261..3513e5c4dd8c 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c +++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c @@ -402,9 +402,6 @@ static int jdi_panel_add(struct jdi_panel *jdi) return dev_err_probe(dev, PTR_ERR(jdi->backlight), "failed to register backlight %d\n", ret); - drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&jdi->base); return 0; @@ -426,9 +423,11 @@ static int jdi_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO | MIPI_DSI_CLOCK_NON_CONTINUOUS; - jdi = devm_kzalloc(&dsi->dev, sizeof(*jdi), GFP_KERNEL); - if (!jdi) - return -ENOMEM; + jdi = devm_drm_panel_alloc(&dsi->dev, __typeof(*jdi), base, + &jdi_panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(jdi)) + return PTR_ERR(jdi); mipi_dsi_set_drvdata(dsi, jdi); diff --git a/drivers/gpu/drm/panel/panel-khadas-ts050.c b/drivers/gpu/drm/panel/panel-khadas-ts050.c index 0e5e8e57bd1e..67ca055f06f3 100644 --- a/drivers/gpu/drm/panel/panel-khadas-ts050.c +++ b/drivers/gpu/drm/panel/panel-khadas-ts050.c @@ -821,9 +821,6 @@ static int khadas_ts050_panel_add(struct khadas_ts050_panel *khadas_ts050) return dev_err_probe(dev, PTR_ERR(khadas_ts050->enable_gpio), "failed to get enable gpio"); - drm_panel_init(&khadas_ts050->base, &khadas_ts050->link->dev, - &khadas_ts050_panel_funcs, DRM_MODE_CONNECTOR_DSI); - err = drm_panel_of_backlight(&khadas_ts050->base); if (err) return err; @@ -850,10 +847,12 @@ static int khadas_ts050_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; - khadas_ts050 = devm_kzalloc(&dsi->dev, sizeof(*khadas_ts050), - GFP_KERNEL); - if (!khadas_ts050) - return -ENOMEM; + khadas_ts050 = devm_drm_panel_alloc(&dsi->dev, __typeof(*khadas_ts050), + base, &khadas_ts050_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(khadas_ts050)) + return PTR_ERR(khadas_ts050); khadas_ts050->panel_data = (struct khadas_ts050_panel_data *)data; mipi_dsi_set_drvdata(dsi, khadas_ts050); diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index d6b912277196..2fc7b0779b37 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -337,9 +337,6 @@ static int kingdisplay_panel_add(struct kingdisplay_panel *kingdisplay) kingdisplay->enable_gpio = NULL; } - drm_panel_init(&kingdisplay->base, &kingdisplay->link->dev, - &kingdisplay_panel_funcs, DRM_MODE_CONNECTOR_DSI); - err = drm_panel_of_backlight(&kingdisplay->base); if (err) return err; @@ -364,9 +361,12 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM; - kingdisplay = devm_kzalloc(&dsi->dev, sizeof(*kingdisplay), GFP_KERNEL); - if (!kingdisplay) - return -ENOMEM; + kingdisplay = devm_drm_panel_alloc(&dsi->dev, __typeof(*kingdisplay), base, + &kingdisplay_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(kingdisplay)) + return PTR_ERR(kingdisplay); mipi_dsi_set_drvdata(dsi, kingdisplay); kingdisplay->link = dsi; diff --git a/drivers/gpu/drm/panel/panel-lg-sw43408.c b/drivers/gpu/drm/panel/panel-lg-sw43408.c index f3dcc39670ea..46a56ea92ad9 100644 --- a/drivers/gpu/drm/panel/panel-lg-sw43408.c +++ b/drivers/gpu/drm/panel/panel-lg-sw43408.c @@ -246,8 +246,6 @@ static int sw43408_add(struct sw43408_panel *ctx) ctx->base.prepare_prev_first = true; - drm_panel_init(&ctx->base, dev, &sw43408_funcs, DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&ctx->base); return ret; } @@ -257,9 +255,11 @@ static int sw43408_probe(struct mipi_dsi_device *dsi) struct sw43408_panel *ctx; int ret; - ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + ctx = devm_drm_panel_alloc(&dsi->dev, __typeof(*ctx), base, + &sw43408_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); dsi->mode_flags = MIPI_DSI_MODE_LPM; dsi->format = MIPI_DSI_FMT_RGB888; diff --git a/drivers/gpu/drm/panel/panel-lvds.c b/drivers/gpu/drm/panel/panel-lvds.c index 23fd535d8f47..46b07f38559f 100644 --- a/drivers/gpu/drm/panel/panel-lvds.c +++ b/drivers/gpu/drm/panel/panel-lvds.c @@ -28,8 +28,6 @@ struct panel_lvds { struct device *dev; const char *label; - unsigned int width; - unsigned int height; struct drm_display_mode dmode; u32 bus_flags; unsigned int bus_format; diff --git a/drivers/gpu/drm/panel/panel-novatek-nt35560.c b/drivers/gpu/drm/panel/panel-novatek-nt35560.c index 98f0782c8411..561e6643dcbb 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt35560.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt35560.c @@ -148,24 +148,20 @@ static inline struct nt35560 *panel_to_nt35560(struct drm_panel *panel) static int nt35560_set_brightness(struct backlight_device *bl) { struct nt35560 *nt = bl_get_data(bl); - struct mipi_dsi_device *dsi = to_mipi_dsi_device(nt->dev); - int period_ns = 1023; + struct mipi_dsi_multi_context dsi_ctx = { + .dsi = to_mipi_dsi_device(nt->dev) + }; int duty_ns = bl->props.brightness; + int period_ns = 1023; u8 pwm_ratio; u8 pwm_div; - u8 par; - int ret; if (backlight_is_blank(bl)) { /* Disable backlight */ - par = 0x00; - ret = mipi_dsi_dcs_write(dsi, MIPI_DCS_WRITE_CONTROL_DISPLAY, - &par, 1); - if (ret) { - dev_err(nt->dev, "failed to disable display backlight (%d)\n", ret); - return ret; - } - return 0; + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, + MIPI_DCS_WRITE_CONTROL_DISPLAY, + 0x00); + return dsi_ctx.accum_err; } /* Calculate the PWM duty cycle in n/256's */ @@ -176,12 +172,6 @@ static int nt35560_set_brightness(struct backlight_device *bl) /* Set up PWM dutycycle ONE byte (differs from the standard) */ dev_dbg(nt->dev, "calculated duty cycle %02x\n", pwm_ratio); - ret = mipi_dsi_dcs_write(dsi, MIPI_DCS_SET_DISPLAY_BRIGHTNESS, - &pwm_ratio, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to set display PWM ratio (%d)\n", ret); - return ret; - } /* * Sequence to write PWMDIV: @@ -192,46 +182,23 @@ static int nt35560_set_brightness(struct backlight_device *bl) * 0x22 PWMDIV * 0x7F 0xAA CMD2 page 1 lock */ - par = 0xaa; - ret = mipi_dsi_dcs_write(dsi, 0xf3, &par, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to unlock CMD 2 (%d)\n", ret); - return ret; - } - par = 0x01; - ret = mipi_dsi_dcs_write(dsi, 0x00, &par, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to enter page 1 (%d)\n", ret); - return ret; - } - par = 0x01; - ret = mipi_dsi_dcs_write(dsi, 0x7d, &par, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to disable MTP reload (%d)\n", ret); - return ret; - } - ret = mipi_dsi_dcs_write(dsi, 0x22, &pwm_div, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to set PWM divisor (%d)\n", ret); - return ret; - } - par = 0xaa; - ret = mipi_dsi_dcs_write(dsi, 0x7f, &par, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to lock CMD 2 (%d)\n", ret); - return ret; - } + mipi_dsi_dcs_write_var_seq_multi(&dsi_ctx, + MIPI_DCS_SET_DISPLAY_BRIGHTNESS, + pwm_ratio); + + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, 0xf3, 0xaa); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, 0x00, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, 0x7d, 0x01); + + mipi_dsi_dcs_write_var_seq_multi(&dsi_ctx, 0x22, pwm_div); + + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, 0x7f, 0xaa); /* Enable backlight */ - par = 0x24; - ret = mipi_dsi_dcs_write(dsi, MIPI_DCS_WRITE_CONTROL_DISPLAY, - &par, 1); - if (ret < 0) { - dev_err(nt->dev, "failed to enable display backlight (%d)\n", ret); - return ret; - } + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, MIPI_DCS_WRITE_CONTROL_DISPLAY, + 0x24); - return 0; + return dsi_ctx.accum_err; } static const struct backlight_ops nt35560_bl_ops = { @@ -244,32 +211,23 @@ static const struct backlight_properties nt35560_bl_props = { .max_brightness = 1023, }; -static int nt35560_read_id(struct nt35560 *nt) +static void nt35560_read_id(struct mipi_dsi_multi_context *dsi_ctx) { - struct mipi_dsi_device *dsi = to_mipi_dsi_device(nt->dev); + struct device dev = dsi_ctx->dsi->dev; u8 vendor, version, panel; u16 val; - int ret; - ret = mipi_dsi_dcs_read(dsi, NT35560_DCS_READ_ID1, &vendor, 1); - if (ret < 0) { - dev_err(nt->dev, "could not vendor ID byte\n"); - return ret; - } - ret = mipi_dsi_dcs_read(dsi, NT35560_DCS_READ_ID2, &version, 1); - if (ret < 0) { - dev_err(nt->dev, "could not read device version byte\n"); - return ret; - } - ret = mipi_dsi_dcs_read(dsi, NT35560_DCS_READ_ID3, &panel, 1); - if (ret < 0) { - dev_err(nt->dev, "could not read panel ID byte\n"); - return ret; - } + mipi_dsi_dcs_read_multi(dsi_ctx, NT35560_DCS_READ_ID1, &vendor, 1); + mipi_dsi_dcs_read_multi(dsi_ctx, NT35560_DCS_READ_ID2, &version, 1); + mipi_dsi_dcs_read_multi(dsi_ctx, NT35560_DCS_READ_ID3, &panel, 1); + + if (dsi_ctx->accum_err < 0) + return; if (vendor == 0x00) { - dev_err(nt->dev, "device vendor ID is zero\n"); - return -ENODEV; + dev_err(&dev, "device vendor ID is zero\n"); + dsi_ctx->accum_err = -ENODEV; + return; } val = (vendor << 8) | panel; @@ -278,16 +236,16 @@ static int nt35560_read_id(struct nt35560 *nt) case DISPLAY_SONY_ACX424AKP_ID2: case DISPLAY_SONY_ACX424AKP_ID3: case DISPLAY_SONY_ACX424AKP_ID4: - dev_info(nt->dev, "MTP vendor: %02x, version: %02x, panel: %02x\n", + dev_info(&dev, + "MTP vendor: %02x, version: %02x, panel: %02x\n", vendor, version, panel); break; default: - dev_info(nt->dev, "unknown vendor: %02x, version: %02x, panel: %02x\n", + dev_info(&dev, + "unknown vendor: %02x, version: %02x, panel: %02x\n", vendor, version, panel); break; } - - return 0; } static int nt35560_power_on(struct nt35560 *nt) @@ -322,92 +280,56 @@ static void nt35560_power_off(struct nt35560 *nt) static int nt35560_prepare(struct drm_panel *panel) { struct nt35560 *nt = panel_to_nt35560(panel); - struct mipi_dsi_device *dsi = to_mipi_dsi_device(nt->dev); - const u8 mddi = 3; + struct mipi_dsi_multi_context dsi_ctx = { + .dsi = to_mipi_dsi_device(nt->dev) + }; int ret; ret = nt35560_power_on(nt); if (ret) return ret; - ret = nt35560_read_id(nt); - if (ret) { - dev_err(nt->dev, "failed to read panel ID (%d)\n", ret); - goto err_power_off; - } + nt35560_read_id(&dsi_ctx); - /* Enabe tearing mode: send TE (tearing effect) at VBLANK */ - ret = mipi_dsi_dcs_set_tear_on(dsi, + /* Enable tearing mode: send TE (tearing effect) at VBLANK */ + mipi_dsi_dcs_set_tear_on_multi(&dsi_ctx, MIPI_DSI_DCS_TEAR_MODE_VBLANK); - if (ret) { - dev_err(nt->dev, "failed to enable vblank TE (%d)\n", ret); - goto err_power_off; - } /* * Set MDDI * * This presumably deactivates the Qualcomm MDDI interface and * selects DSI, similar code is found in other drivers such as the - * Sharp LS043T1LE01 which makes us suspect that this panel may be - * using a Novatek NT35565 or similar display driver chip that shares - * this command. Due to the lack of documentation we cannot know for - * sure. + * Sharp LS043T1LE01. */ - ret = mipi_dsi_dcs_write(dsi, NT35560_DCS_SET_MDDI, - &mddi, sizeof(mddi)); - if (ret < 0) { - dev_err(nt->dev, "failed to set MDDI (%d)\n", ret); - goto err_power_off; - } + mipi_dsi_dcs_write_seq_multi(&dsi_ctx, NT35560_DCS_SET_MDDI, 3); - /* Exit sleep mode */ - ret = mipi_dsi_dcs_exit_sleep_mode(dsi); - if (ret) { - dev_err(nt->dev, "failed to exit sleep mode (%d)\n", ret); - goto err_power_off; - } - msleep(140); + mipi_dsi_dcs_exit_sleep_mode_multi(&dsi_ctx); + mipi_dsi_msleep(&dsi_ctx, 140); - ret = mipi_dsi_dcs_set_display_on(dsi); - if (ret) { - dev_err(nt->dev, "failed to turn display on (%d)\n", ret); - goto err_power_off; - } + mipi_dsi_dcs_set_display_on_multi(&dsi_ctx); if (nt->video_mode) { - /* In video mode turn peripheral on */ - ret = mipi_dsi_turn_on_peripheral(dsi); - if (ret) { - dev_err(nt->dev, "failed to turn on peripheral\n"); - goto err_power_off; - } + mipi_dsi_turn_on_peripheral_multi(&dsi_ctx); } - return 0; - -err_power_off: - nt35560_power_off(nt); - return ret; + if (dsi_ctx.accum_err < 0) + nt35560_power_off(nt); + return dsi_ctx.accum_err; } static int nt35560_unprepare(struct drm_panel *panel) { struct nt35560 *nt = panel_to_nt35560(panel); - struct mipi_dsi_device *dsi = to_mipi_dsi_device(nt->dev); - int ret; + struct mipi_dsi_multi_context dsi_ctx = { + .dsi = to_mipi_dsi_device(nt->dev) + }; - ret = mipi_dsi_dcs_set_display_off(dsi); - if (ret) { - dev_err(nt->dev, "failed to turn display off (%d)\n", ret); - return ret; - } + mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); + mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + + if (dsi_ctx.accum_err < 0) + return dsi_ctx.accum_err; - /* Enter sleep mode */ - ret = mipi_dsi_dcs_enter_sleep_mode(dsi); - if (ret) { - dev_err(nt->dev, "failed to enter sleep mode (%d)\n", ret); - return ret; - } msleep(85); nt35560_power_off(nt); diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c index 32cf64c7c18b..226d91daf8c7 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c @@ -23,14 +23,6 @@ #define DSI_NUM_MIN 1 -#define mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, cmd, seq...) \ - do { \ - dsi_ctx.dsi = dsi0; \ - mipi_dsi_dcs_write_seq_multi(&dsi_ctx, cmd, seq); \ - dsi_ctx.dsi = dsi1; \ - mipi_dsi_dcs_write_seq_multi(&dsi_ctx, cmd, seq); \ - } while (0) - struct panel_info { struct drm_panel panel; struct mipi_dsi_device *dsi[2]; @@ -71,217 +63,217 @@ static int elish_boe_init_sequence(struct panel_info *pinfo) struct mipi_dsi_device *dsi1 = pinfo->dsi[1]; struct mipi_dsi_multi_context dsi_ctx = { .dsi = NULL }; /* No datasheet, so write magic init sequence directly */ - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x05); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x18, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x23); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x00, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x01, 0x84); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x05, 0x2d); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x06, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x07, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x08, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x09, 0x45); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x11, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x12, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x15, 0x83); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x16, 0x0c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x29, 0x0a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x30, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x31, 0xfe); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x32, 0xfd); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x33, 0xfb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x34, 0xf8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x35, 0xf5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x36, 0xf3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x37, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x38, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x39, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3a, 0xef); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3b, 0xec); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3d, 0xe9); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3f, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x40, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x41, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2a, 0x13); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x45, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x46, 0xf4); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x47, 0xe7); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x48, 0xda); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x49, 0xcd); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4a, 0xc0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4b, 0xb3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4c, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4d, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4e, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4f, 0x99); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x50, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x68); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x52, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x53, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x54, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2b, 0x0e); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x58, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x59, 0xfb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5a, 0xf7); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5b, 0xf3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5c, 0xef); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5d, 0xe3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5e, 0xda); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5f, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x60, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x61, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x62, 0xcb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x63, 0xbf); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x64, 0xb3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x65, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x66, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x67, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x2a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x25, 0x47); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x30, 0x47); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x39, 0x47); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x26); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x19, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1a, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1b, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1c, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2a, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2b, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xf0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x84, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x85, 0x0c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x91, 0x1f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x92, 0x0f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x93, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x94, 0x18); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x95, 0x03); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x96, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb0, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x19, 0x1f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1b, 0x1b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x24); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb8, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x27); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd0, 0x31); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd1, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd2, 0x30); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd4, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xde, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xdf, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x26); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x00, 0x81); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x01, 0xb0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x22); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9f, 0x50); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x6f, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x70, 0x11); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x73, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x74, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x76, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x77, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xa0, 0x3f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xa9, 0x50); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xaa, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xab, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xad, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb8, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xba, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbb, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbe, 0x04); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbf, 0x49); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc0, 0x04); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc1, 0x59); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc2, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc5, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc6, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc7, 0x48); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xca, 0x43); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xcb, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xce, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xcf, 0x43); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd0, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd3, 0x43); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd4, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd7, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xdc, 0x43); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xdd, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xe1, 0x43); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xe2, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xf2, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xf3, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xf4, 0x48); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x13, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x14, 0x23); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbc, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbd, 0x23); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x2a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x97, 0x3c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x98, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x99, 0x95); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9a, 0x03); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9b, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9c, 0x0b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9d, 0x0a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9e, 0x90); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x22); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9f, 0x50); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x23); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xa3, 0x50); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x14, 0x60); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x16, 0xc0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4f, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xf0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3a, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xd0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x02, 0xaf); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x09, 0xee); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1c, 0x99); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1d, 0x09); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x0f, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x53, 0x2c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x35, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbb, 0x13); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3b, 0x03, 0xac, 0x1a, 0x04, 0x04); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x11); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x05); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x18, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x23); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x00, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x01, 0x84); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x05, 0x2d); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x06, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x07, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x08, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x09, 0x45); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x11, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x12, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x15, 0x83); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x16, 0x0c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x29, 0x0a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x30, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x31, 0xfe); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x32, 0xfd); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x33, 0xfb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x34, 0xf8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x35, 0xf5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x36, 0xf3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x37, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x38, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x39, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3a, 0xef); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3b, 0xec); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3d, 0xe9); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3f, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x40, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x41, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2a, 0x13); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x45, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x46, 0xf4); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x47, 0xe7); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x48, 0xda); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x49, 0xcd); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4a, 0xc0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4b, 0xb3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4c, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4d, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4e, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4f, 0x99); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x50, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x68); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x52, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x53, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x54, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2b, 0x0e); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x58, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x59, 0xfb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5a, 0xf7); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5b, 0xf3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5c, 0xef); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5d, 0xe3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5e, 0xda); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5f, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x60, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x61, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x62, 0xcb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x63, 0xbf); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x64, 0xb3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x65, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x66, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x67, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x2a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x25, 0x47); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x30, 0x47); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x39, 0x47); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x26); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x19, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1a, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1b, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1c, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2a, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2b, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xf0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x84, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x85, 0x0c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x91, 0x1f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x92, 0x0f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x93, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x94, 0x18); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x95, 0x03); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x96, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb0, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x19, 0x1f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1b, 0x1b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x24); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb8, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x27); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd0, 0x31); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd1, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd2, 0x30); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd4, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xde, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xdf, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x26); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x00, 0x81); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x01, 0xb0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x22); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9f, 0x50); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x6f, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x70, 0x11); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x73, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x74, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x76, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x77, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xa0, 0x3f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xa9, 0x50); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xaa, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xab, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xad, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb8, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xba, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbb, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbe, 0x04); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbf, 0x49); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc0, 0x04); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc1, 0x59); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc2, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc5, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc6, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc7, 0x48); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xca, 0x43); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xcb, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xce, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xcf, 0x43); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd0, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd3, 0x43); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd4, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd7, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xdc, 0x43); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xdd, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xe1, 0x43); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xe2, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xf2, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xf3, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xf4, 0x48); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x13, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x14, 0x23); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbc, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbd, 0x23); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x2a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x97, 0x3c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x98, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x99, 0x95); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9a, 0x03); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9b, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9c, 0x0b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9d, 0x0a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9e, 0x90); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x22); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9f, 0x50); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x23); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xa3, 0x50); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x14, 0x60); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x16, 0xc0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4f, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xf0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3a, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xd0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x02, 0xaf); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x09, 0xee); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1c, 0x99); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1d, 0x09); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x0f, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x53, 0x2c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x35, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbb, 0x13); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3b, 0x03, 0xac, 0x1a, 0x04, 0x04); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x11); mipi_dsi_msleep(&dsi_ctx, 70); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x29); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x29); return dsi_ctx.accum_err; } @@ -292,195 +284,195 @@ static int elish_csot_init_sequence(struct panel_info *pinfo) struct mipi_dsi_device *dsi1 = pinfo->dsi[1]; struct mipi_dsi_multi_context dsi_ctx = { .dsi = NULL }; /* No datasheet, so write magic init sequence directly */ - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x05); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x18, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xd0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x02, 0xaf); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x00, 0x30); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x09, 0xee); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1c, 0x99); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1d, 0x09); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xf0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3a, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4f, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x58, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x35, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x23); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x00, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x01, 0x84); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x05, 0x2d); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x06, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x07, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x08, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x09, 0x45); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x11, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x12, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x15, 0x83); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x16, 0x0c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x29, 0x0a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x30, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x31, 0xfe); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x32, 0xfd); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x33, 0xfb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x34, 0xf8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x35, 0xf5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x36, 0xf3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x37, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x38, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x39, 0xf2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3a, 0xef); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3b, 0xec); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3d, 0xe9); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3f, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x40, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x41, 0xe5); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2a, 0x13); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x45, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x46, 0xf4); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x47, 0xe7); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x48, 0xda); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x49, 0xcd); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4a, 0xc0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4b, 0xb3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4c, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4d, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4e, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x4f, 0x99); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x50, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x68); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x52, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x53, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x54, 0x66); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2b, 0x0e); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x58, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x59, 0xfb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5a, 0xf7); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5b, 0xf3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5c, 0xef); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5d, 0xe3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5e, 0xda); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x5f, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x60, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x61, 0xd8); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x62, 0xcb); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x63, 0xbf); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x64, 0xb3); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x65, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x66, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x67, 0xb2); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x0f, 0xff); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x53, 0x2c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x55, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbb, 0x13); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x3b, 0x03, 0xac, 0x1a, 0x04, 0x04); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x2a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x25, 0x46); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x30, 0x46); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x39, 0x46); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x26); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x01, 0xb0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x19, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1a, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1b, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1c, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2a, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x2b, 0xe0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0xf0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x84, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x85, 0x0c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x51, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x91, 0x1f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x92, 0x0f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x93, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x94, 0x18); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x95, 0x03); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x96, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb0, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x19, 0x1f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x1b, 0x1b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x24); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb8, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x27); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd0, 0x31); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd1, 0x20); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd4, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xde, 0x80); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xdf, 0x02); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x26); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x00, 0x81); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x01, 0xb0); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x22); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x6f, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x70, 0x11); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x73, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x74, 0x4d); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xa0, 0x3f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xa9, 0x50); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xaa, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xab, 0x28); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xad, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb8, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xb9, 0x4b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xba, 0x96); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbb, 0x4b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbe, 0x07); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbf, 0x4b); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc0, 0x07); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc1, 0x5c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc2, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc5, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc6, 0x3f); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xc7, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xca, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xcb, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xce, 0x00); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xcf, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd0, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd3, 0x08); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xd4, 0x40); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x25); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbc, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xbd, 0x1c); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x2a); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xfb, 0x01); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x9a, 0x03); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0xff, 0x10); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x11); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x05); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x18, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xd0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x02, 0xaf); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x00, 0x30); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x09, 0xee); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1c, 0x99); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1d, 0x09); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xf0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3a, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4f, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x58, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x35, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x23); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x00, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x01, 0x84); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x05, 0x2d); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x06, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x07, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x08, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x09, 0x45); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x11, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x12, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x15, 0x83); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x16, 0x0c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x29, 0x0a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x30, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x31, 0xfe); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x32, 0xfd); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x33, 0xfb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x34, 0xf8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x35, 0xf5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x36, 0xf3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x37, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x38, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x39, 0xf2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3a, 0xef); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3b, 0xec); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3d, 0xe9); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3f, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x40, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x41, 0xe5); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2a, 0x13); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x45, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x46, 0xf4); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x47, 0xe7); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x48, 0xda); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x49, 0xcd); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4a, 0xc0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4b, 0xb3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4c, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4d, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4e, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x4f, 0x99); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x50, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x68); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x52, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x53, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x54, 0x66); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2b, 0x0e); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x58, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x59, 0xfb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5a, 0xf7); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5b, 0xf3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5c, 0xef); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5d, 0xe3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5e, 0xda); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x5f, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x60, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x61, 0xd8); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x62, 0xcb); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x63, 0xbf); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x64, 0xb3); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x65, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x66, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x67, 0xb2); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x0f, 0xff); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x53, 0x2c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x55, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbb, 0x13); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x3b, 0x03, 0xac, 0x1a, 0x04, 0x04); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x2a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x25, 0x46); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x30, 0x46); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x39, 0x46); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x26); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x01, 0xb0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x19, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1a, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1b, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1c, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2a, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x2b, 0xe0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0xf0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x84, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x85, 0x0c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x51, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x91, 0x1f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x92, 0x0f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x93, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x94, 0x18); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x95, 0x03); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x96, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb0, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x19, 0x1f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x1b, 0x1b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x24); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb8, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x27); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd0, 0x31); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd1, 0x20); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd4, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xde, 0x80); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xdf, 0x02); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x26); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x00, 0x81); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x01, 0xb0); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x22); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x6f, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x70, 0x11); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x73, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x74, 0x4d); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xa0, 0x3f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xa9, 0x50); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xaa, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xab, 0x28); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xad, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb8, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xb9, 0x4b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xba, 0x96); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbb, 0x4b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbe, 0x07); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbf, 0x4b); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc0, 0x07); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc1, 0x5c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc2, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc5, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc6, 0x3f); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xc7, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xca, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xcb, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xce, 0x00); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xcf, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd0, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd3, 0x08); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xd4, 0x40); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x25); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbc, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xbd, 0x1c); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x2a); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xfb, 0x01); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x9a, 0x03); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0xff, 0x10); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x11); mipi_dsi_msleep(&dsi_ctx, 70); - mipi_dsi_dual_dcs_write_seq_multi(dsi_ctx, dsi0, dsi1, 0x29); + mipi_dsi_dual_dcs_write_seq_multi(&dsi_ctx, dsi0, dsi1, 0x29); return dsi_ctx.accum_err; } diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c index c2abd20e0734..29e1f6aea480 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c @@ -608,8 +608,6 @@ static int nt36672a_panel_add(struct nt36672a_panel *pinfo) return dev_err_probe(dev, PTR_ERR(pinfo->reset_gpio), "failed to get reset gpio from DT\n"); - drm_panel_init(&pinfo->base, dev, &panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&pinfo->base); if (ret) return dev_err_probe(dev, ret, "Failed to get backlight\n"); @@ -625,9 +623,11 @@ static int nt36672a_panel_probe(struct mipi_dsi_device *dsi) const struct nt36672a_panel_desc *desc; int err; - pinfo = devm_kzalloc(&dsi->dev, sizeof(*pinfo), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; + pinfo = devm_drm_panel_alloc(&dsi->dev, __typeof(*pinfo), base, + &panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(pinfo)) + return PTR_ERR(pinfo); desc = of_device_get_match_data(&dsi->dev); dsi->mode_flags = desc->mode_flags; diff --git a/drivers/gpu/drm/panel/panel-orisetech-ota5601a.c b/drivers/gpu/drm/panel/panel-orisetech-ota5601a.c index 3231e84dc66c..8a608972fc41 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-ota5601a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-ota5601a.c @@ -276,11 +276,8 @@ static int ota5601a_probe(struct spi_device *spi) } err = drm_panel_of_backlight(&panel->drm_panel); - if (err) { - if (err != -EPROBE_DEFER) - dev_err(dev, "Failed to get backlight handle\n"); - return err; - } + if (err) + return dev_err_probe(dev, err, "Failed to get backlight handle\n"); drm_panel_add(&panel->drm_panel); diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c index dbea84f51514..2334b77f348c 100644 --- a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c +++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c @@ -132,9 +132,6 @@ static int osd101t2587_panel_add(struct osd101t2587_panel *osd101t2587) if (IS_ERR(osd101t2587->supply)) return PTR_ERR(osd101t2587->supply); - drm_panel_init(&osd101t2587->base, &osd101t2587->dsi->dev, - &osd101t2587_panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&osd101t2587->base); if (ret) return ret; @@ -161,9 +158,12 @@ static int osd101t2587_panel_probe(struct mipi_dsi_device *dsi) MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET; - osd101t2587 = devm_kzalloc(&dsi->dev, sizeof(*osd101t2587), GFP_KERNEL); - if (!osd101t2587) - return -ENOMEM; + osd101t2587 = devm_drm_panel_alloc(&dsi->dev, __typeof(*osd101t2587), base, + &osd101t2587_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(osd101t2587)) + return PTR_ERR(osd101t2587); mipi_dsi_set_drvdata(dsi, osd101t2587); diff --git a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c index d1c5c9bc3c56..3c3308fc55df 100644 --- a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c +++ b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c @@ -166,9 +166,6 @@ static int wuxga_nt_panel_add(struct wuxga_nt_panel *wuxga_nt) if (IS_ERR(wuxga_nt->supply)) return PTR_ERR(wuxga_nt->supply); - drm_panel_init(&wuxga_nt->base, &wuxga_nt->dsi->dev, - &wuxga_nt_panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&wuxga_nt->base); if (ret) return ret; @@ -196,9 +193,12 @@ static int wuxga_nt_panel_probe(struct mipi_dsi_device *dsi) MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM; - wuxga_nt = devm_kzalloc(&dsi->dev, sizeof(*wuxga_nt), GFP_KERNEL); - if (!wuxga_nt) - return -ENOMEM; + wuxga_nt = devm_drm_panel_alloc(&dsi->dev, __typeof(*wuxga_nt), base, + &wuxga_nt_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(wuxga_nt)) + return PTR_ERR(wuxga_nt); mipi_dsi_set_drvdata(dsi, wuxga_nt); diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index e10e469aa7a6..dc4bb8ad9131 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -373,9 +373,12 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) .node = NULL, }; - ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL); - if (!ts) - return -ENOMEM; + ts = devm_drm_panel_alloc(dev, __typeof(*ts), base, + &rpi_touchscreen_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ts)) + return PTR_ERR(ts); i2c_set_clientdata(i2c, ts); @@ -428,9 +431,6 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) return PTR_ERR(ts->dsi); } - drm_panel_init(&ts->base, dev, &rpi_touchscreen_funcs, - DRM_MODE_CONNECTOR_DSI); - /* This appears last, as it's what will unblock the DSI host * driver's component bind function. */ diff --git a/drivers/gpu/drm/panel/panel-raydium-rm67200.c b/drivers/gpu/drm/panel/panel-raydium-rm67200.c index 1da8225e6f7a..333faed62da7 100644 --- a/drivers/gpu/drm/panel/panel-raydium-rm67200.c +++ b/drivers/gpu/drm/panel/panel-raydium-rm67200.c @@ -36,12 +36,14 @@ static inline struct raydium_rm67200 *to_raydium_rm67200(struct drm_panel *panel static void raydium_rm67200_reset(struct raydium_rm67200 *ctx) { - gpiod_set_value_cansleep(ctx->reset_gpio, 0); - msleep(60); - gpiod_set_value_cansleep(ctx->reset_gpio, 1); - msleep(60); - gpiod_set_value_cansleep(ctx->reset_gpio, 0); - msleep(60); + if (ctx->reset_gpio) { + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + msleep(60); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + msleep(60); + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + msleep(60); + } } static void raydium_rm67200_write(struct mipi_dsi_multi_context *ctx, @@ -318,6 +320,7 @@ static void w552793baa_setup(struct mipi_dsi_multi_context *ctx) static int raydium_rm67200_prepare(struct drm_panel *panel) { struct raydium_rm67200 *ctx = to_raydium_rm67200(panel); + struct mipi_dsi_multi_context mctx = { .dsi = ctx->dsi }; int ret; ret = regulator_bulk_enable(ctx->num_supplies, ctx->supplies); @@ -328,6 +331,12 @@ static int raydium_rm67200_prepare(struct drm_panel *panel) msleep(60); + ctx->panel_info->panel_setup(&mctx); + mipi_dsi_dcs_exit_sleep_mode_multi(&mctx); + mipi_dsi_msleep(&mctx, 120); + mipi_dsi_dcs_set_display_on_multi(&mctx); + mipi_dsi_msleep(&mctx, 30); + return 0; } @@ -343,20 +352,6 @@ static int raydium_rm67200_unprepare(struct drm_panel *panel) return 0; } -static int raydium_rm67200_enable(struct drm_panel *panel) -{ - struct raydium_rm67200 *rm67200 = to_raydium_rm67200(panel); - struct mipi_dsi_multi_context ctx = { .dsi = rm67200->dsi }; - - rm67200->panel_info->panel_setup(&ctx); - mipi_dsi_dcs_exit_sleep_mode_multi(&ctx); - mipi_dsi_msleep(&ctx, 120); - mipi_dsi_dcs_set_display_on_multi(&ctx); - mipi_dsi_msleep(&ctx, 30); - - return ctx.accum_err; -} - static int raydium_rm67200_disable(struct drm_panel *panel) { struct raydium_rm67200 *rm67200 = to_raydium_rm67200(panel); @@ -381,7 +376,6 @@ static const struct drm_panel_funcs raydium_rm67200_funcs = { .prepare = raydium_rm67200_prepare, .unprepare = raydium_rm67200_unprepare, .get_modes = raydium_rm67200_get_modes, - .enable = raydium_rm67200_enable, .disable = raydium_rm67200_disable, }; @@ -409,7 +403,7 @@ static int raydium_rm67200_probe(struct mipi_dsi_device *dsi) if (ret < 0) return ret; - ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + ctx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(ctx->reset_gpio)) return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio), "Failed to get reset-gpios\n"); @@ -470,6 +464,7 @@ static const struct raydium_rm67200_panel_info w552793baa_info = { .vtotal = 1952, .width_mm = 68, /* 68.04mm */ .height_mm = 121, /* 120.96mm */ + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, .type = DRM_MODE_TYPE_DRIVER, }, .regulators = w552793baa_regulators, diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c index b5b9e80690f6..692020081524 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c @@ -244,7 +244,7 @@ static const struct s6d7aa0_panel_desc s6d7aa0_lsl080al02_desc = { .init_func = s6d7aa0_lsl080al02_init, .off_func = s6d7aa0_lsl080al02_off, .drm_mode = &s6d7aa0_lsl080al02_mode, - .mode_flags = MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_NO_HFP, + .mode_flags = MIPI_DSI_MODE_VIDEO_NO_HFP, .bus_flags = 0, .has_backlight = false, diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c b/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c index e91f50662997..7e2f4e043d62 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e88a0-ams427ap24.c @@ -7,7 +7,9 @@ #include <linux/backlight.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> +#include <linux/property.h> #include <linux/regulator/consumer.h> #include <video/mipi_display.h> diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c index 897df195f2f3..1b5c500d4f4e 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c @@ -992,7 +992,7 @@ static int s6e8aa0_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST - | MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_AUTO_VERT; + | MIPI_DSI_MODE_VIDEO_AUTO_VERT; ret = s6e8aa0_parse_dt(ctx); if (ret < 0) diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa5x01-ams561ra01.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa5x01-ams561ra01.c new file mode 100644 index 000000000000..56e10c7c3a76 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa5x01-ams561ra01.c @@ -0,0 +1,981 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Samsung AMS561RA01 panel with S6E8AA5X01 controller. + * + * Copyright (C) 2025 Kaustabh Chakraborty <kauschluss@disroot.org> + */ + +#include <linux/backlight.h> +#include <linux/gpio/consumer.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/regulator/consumer.h> + +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> +#include <drm/drm_probe_helper.h> + +/* Manufacturer Command Set */ +#define MCS_AIDCTL 0xb2 +#define MCS_ADAPTIVECTL 0xb5 +#define MCS_ELVSS 0xb6 +#define MCS_TEMPERCTL 0xb8 +#define MCS_PENTILE 0xc0 +#define MCS_GAMMACTL 0xca +#define MCS_LTPSCTL 0xcb +#define MCS_PCD 0xcc +#define MCS_ERRFLAG 0xe7 +#define MCS_ACCESSPROT 0xf0 +#define MCS_DISPCTL 0xf2 +#define MCS_GAMMAUPD 0xf7 + +#define GAMMA_CMD_LEN 34 +#define AID_CMD_LEN 3 + +static const struct { + u8 gamma[GAMMA_CMD_LEN]; + u8 aid[AID_CMD_LEN]; +} s6e8aa5x01_ams561ra01_cmds[] = { + { + /* 5 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x94, + 0x88, 0x89, 0x8a, 0x87, 0x87, 0x89, + 0x8d, 0x8c, 0x8d, 0x89, 0x8c, 0x8e, + 0x8e, 0x8f, 0x90, 0xa3, 0xa2, 0x9a, + 0xcf, 0xca, 0x9f, 0xe6, 0xff, 0xb4, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0xa5 }, + }, { + /* 6 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x95, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x8c, 0x8a, 0x8c, 0x85, 0x88, 0x8c, + 0x8b, 0x8c, 0x8e, 0xa2, 0xa2, 0x9a, + 0xd0, 0xcc, 0xa2, 0xed, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x95 }, + }, { + /* 7 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x95, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x8c, 0x8a, 0x8c, 0x85, 0x88, 0x8c, + 0x8b, 0x8c, 0x8e, 0xa2, 0xa2, 0x99, + 0xc8, 0xc4, 0x9d, 0xed, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x89 }, + }, { + /* 8 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8a, 0x87, 0x87, 0x89, + 0x8a, 0x88, 0x8b, 0x83, 0x86, 0x8b, + 0x8c, 0x8b, 0x8d, 0x9d, 0x9f, 0x97, + 0xc7, 0xc3, 0x9c, 0xf5, 0xff, 0xbb, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x7e }, + }, { + /* 9 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8a, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x8a, 0x82, 0x84, 0x88, + 0x90, 0x8f, 0x91, 0x95, 0x97, 0x94, + 0xc6, 0xc2, 0x9d, 0xf5, 0xff, 0xbb, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x73 }, + }, { + /* 10 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8a, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x8a, 0x82, 0x84, 0x88, + 0x90, 0x8f, 0x91, 0x94, 0x97, 0x93, + 0xc6, 0xc2, 0x9e, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x67 }, + }, { + /* 11 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8a, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x8a, 0x82, 0x84, 0x88, + 0x8b, 0x8b, 0x8d, 0x90, 0x93, 0x92, + 0xc5, 0xc1, 0x9c, 0xf5, 0xff, 0xbb, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x56 }, + }, { + /* 12 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x82, 0x84, 0x88, + 0x87, 0x86, 0x8a, 0x8c, 0x90, 0x8f, + 0xcd, 0xc9, 0xa1, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x4a }, + }, { + /* 13 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x82, 0x84, 0x88, + 0x87, 0x86, 0x8a, 0x8c, 0x90, 0x8e, + 0xc4, 0xbf, 0x9c, 0xf5, 0xff, 0xbb, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x3b }, + }, { + /* 14 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x82, 0x84, 0x88, + 0x87, 0x86, 0x89, 0x8c, 0x90, 0x8f, + 0xc2, 0xbf, 0x9c, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x35 }, + }, { + /* 15 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x82, 0x84, 0x88, + 0x87, 0x86, 0x89, 0x8c, 0x90, 0x8f, + 0xb7, 0xb6, 0x96, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x25 }, + }, { + /* 16 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x82, 0x84, 0x88, + 0x88, 0x86, 0x89, 0x8c, 0x90, 0x8f, + 0xb7, 0xb6, 0x96, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x20 }, + }, { + /* 17 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x7f, 0x80, 0x86, + 0x86, 0x85, 0x89, 0x88, 0x8c, 0x8e, + 0xbf, 0xbe, 0x9c, 0xec, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x05, 0x11 }, + }, { + /* 19 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x7f, 0x80, 0x86, + 0x87, 0x85, 0x89, 0x88, 0x8c, 0x8e, + 0xb3, 0xb4, 0x97, 0xeb, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0xf2 }, + }, { + /* 20 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x95, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x89, 0x86, 0x89, 0x7f, 0x80, 0x86, + 0x87, 0x85, 0x89, 0x89, 0x8c, 0x8e, + 0xb3, 0xb4, 0x97, 0xeb, 0xff, 0xb7, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0xe4 }, + }, { + /* 21 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x96, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x8a, 0x88, 0x8b, 0x7d, 0x7e, 0x84, + 0x8c, 0x8a, 0x8c, 0x8e, 0x90, 0x8f, + 0xb6, 0xb6, 0x97, 0xe3, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0xd5 }, + }, { + /* 22 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x97, + 0x88, 0x89, 0x8b, 0x87, 0x87, 0x89, + 0x8a, 0x88, 0x8b, 0x81, 0x82, 0x86, + 0x87, 0x86, 0x88, 0x8e, 0x90, 0x8f, + 0xb6, 0xb6, 0x95, 0xe3, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0xc5 }, + }, { + /* 24 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x97, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x81, 0x82, 0x86, + 0x87, 0x86, 0x88, 0x8e, 0x90, 0x8f, + 0xb6, 0xb6, 0x94, 0xe3, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0xa7 }, + }, { + /* 25 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x98, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x81, 0x82, 0x86, + 0x87, 0x86, 0x87, 0x8e, 0x90, 0x8f, + 0xbf, 0xbf, 0x9a, 0xda, 0xfa, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0x95 }, + }, { + /* 27 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x99, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x83, 0x86, 0x8a, + 0x88, 0x87, 0x87, 0x88, 0x8b, 0x8c, + 0xbf, 0xbf, 0x9a, 0xda, 0xfa, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0x76 }, + }, { + /* 29 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x99, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x83, 0x86, 0x89, + 0x88, 0x87, 0x88, 0x88, 0x8b, 0x8b, + 0xbf, 0xbf, 0x9a, 0xda, 0xfa, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0x54 }, + }, { + /* 30 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9a, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x84, 0x86, 0x8a, + 0x87, 0x87, 0x87, 0x88, 0x8b, 0x8b, + 0xbf, 0xbf, 0x99, 0xda, 0xfa, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0x44 }, + }, { + /* 32 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9a, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x89, 0x87, 0x8a, 0x84, 0x86, 0x8a, + 0x87, 0x87, 0x87, 0x89, 0x8b, 0x8b, + 0xbf, 0xbf, 0x98, 0xd2, 0xf2, 0xac, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x04, 0x1f }, + }, { + /* 34 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x88, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8b, 0x87, 0x8b, 0x83, 0x86, 0x89, + 0x87, 0x87, 0x88, 0x88, 0x8b, 0x8a, + 0xbf, 0xbf, 0x98, 0xd2, 0xf2, 0xac, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0xff }, + }, { + /* 37 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x81, 0x82, 0x86, + 0x86, 0x86, 0x86, 0x8d, 0x90, 0x8d, + 0xc0, 0xbf, 0x9a, 0xd2, 0xf2, 0xac, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0xd3 }, + }, { + /* 39 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8a, 0x81, 0x82, 0x86, + 0x87, 0x86, 0x87, 0x8d, 0x90, 0x8d, + 0xb6, 0xb6, 0x93, 0xda, 0xf9, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0xb3 }, + }, { + /* 41 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x85, + 0x87, 0x86, 0x87, 0x8d, 0x90, 0x8d, + 0xb6, 0xb6, 0x94, 0xda, 0xf9, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0x93 }, + }, { + /* 44 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x86, + 0x87, 0x86, 0x86, 0x85, 0x87, 0x8a, + 0xbe, 0xbe, 0x99, 0xda, 0xf9, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0x66 }, + }, { + /* 47 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9b, + 0x89, 0x89, 0x8c, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x86, + 0x88, 0x86, 0x87, 0x84, 0x87, 0x89, + 0xb4, 0xb4, 0x94, 0xe2, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0x40 }, + }, { + /* 50 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9c, + 0x89, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x86, + 0x88, 0x86, 0x87, 0x84, 0x87, 0x89, + 0xb4, 0xb4, 0x95, 0xe2, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x03, 0x0e }, + }, { + /* 53 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9c, + 0x89, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x86, + 0x88, 0x86, 0x87, 0x85, 0x87, 0x8a, + 0xb4, 0xb4, 0x96, 0xe2, 0xff, 0xb3, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0xe2 }, + }, { + /* 56 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9c, + 0x89, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x81, 0x82, 0x86, + 0x88, 0x86, 0x87, 0x85, 0x87, 0x8a, + 0xab, 0xab, 0x90, 0xdd, 0xf7, 0xaf, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0xb5 }, + }, { + /* 60 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9c, + 0x89, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x82, 0x82, 0x87, + 0x83, 0x81, 0x84, 0x81, 0x84, 0x88, + 0xb3, 0xb3, 0x96, 0xcf, 0xe5, 0xa8, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x77 }, + }, { + /* 64 nits */ + { MCS_GAMMACTL, + 0x00, 0x98, 0x00, 0xa4, 0x00, 0x9c, + 0x89, 0x89, 0x8b, 0x88, 0x88, 0x8a, + 0x8a, 0x87, 0x8b, 0x82, 0x82, 0x87, + 0x83, 0x81, 0x84, 0x82, 0x84, 0x88, + 0xb2, 0xb3, 0x97, 0xcf, 0xe5, 0xa8, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x36 }, + }, { + /* 68 nits */ + { MCS_GAMMACTL, + 0x00, 0x9b, 0x00, 0xa6, 0x00, 0x9d, + 0x88, 0x88, 0x89, 0x89, 0x89, 0x8b, + 0x8a, 0x88, 0x8b, 0x7f, 0x80, 0x86, + 0x88, 0x86, 0x87, 0x7d, 0x7f, 0x85, + 0xb2, 0xb3, 0x97, 0xcf, 0xe5, 0xa8, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 72 nits */ + { MCS_GAMMACTL, + 0x00, 0x9c, 0x00, 0xa9, 0x00, 0xa0, + 0x88, 0x88, 0x89, 0x88, 0x88, 0x8a, + 0x8c, 0x8a, 0x8d, 0x7f, 0x81, 0x85, + 0x84, 0x82, 0x84, 0x85, 0x87, 0x8a, + 0xaa, 0xab, 0x93, 0xcf, 0xe5, 0xa8, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 77 nits */ + { MCS_GAMMACTL, + 0x00, 0xa1, 0x00, 0xad, 0x00, 0xa5, + 0x89, 0x89, 0x8a, 0x88, 0x87, 0x89, + 0x8c, 0x89, 0x8d, 0x7f, 0x81, 0x85, + 0x84, 0x83, 0x84, 0x81, 0x83, 0x86, + 0xaa, 0xab, 0x93, 0xc0, 0xd3, 0xa1, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 82 nits */ + { MCS_GAMMACTL, + 0x00, 0xa5, 0x00, 0xb0, 0x00, 0xa9, + 0x88, 0x89, 0x89, 0x85, 0x86, 0x89, + 0x8a, 0x88, 0x8b, 0x82, 0x82, 0x87, + 0x81, 0x80, 0x82, 0x89, 0x8b, 0x8b, + 0xa2, 0xa3, 0x8e, 0xc0, 0xd3, 0xa1, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 87 nits */ + { MCS_GAMMACTL, + 0x00, 0xab, 0x00, 0xb4, 0x00, 0xad, + 0x88, 0x89, 0x8a, 0x84, 0x86, 0x88, + 0x8a, 0x88, 0x8b, 0x7f, 0x7f, 0x84, + 0x86, 0x84, 0x85, 0x85, 0x86, 0x88, + 0xa2, 0xa3, 0x8f, 0xc0, 0xd3, 0xa1, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 93 nits */ + { MCS_GAMMACTL, + 0x00, 0xaf, 0x00, 0xb9, 0x00, 0xb1, + 0x88, 0x89, 0x8a, 0x84, 0x85, 0x87, + 0x8a, 0x89, 0x8b, 0x7e, 0x7e, 0x83, + 0x87, 0x86, 0x86, 0x88, 0x8a, 0x89, + 0x9c, 0x9c, 0x8b, 0xc0, 0xd3, 0xa1, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 98 nits */ + { MCS_GAMMACTL, + 0x00, 0xb3, 0x00, 0xbc, 0x00, 0xb5, + 0x88, 0x88, 0x88, 0x84, 0x84, 0x86, + 0x8a, 0x88, 0x8a, 0x7f, 0x7f, 0x84, + 0x84, 0x83, 0x84, 0x88, 0x8a, 0x89, + 0x9c, 0x9c, 0x8b, 0xc0, 0xd3, 0xa1, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 105 nits */ + { MCS_GAMMACTL, + 0x00, 0xb7, 0x00, 0xc0, 0x00, 0xba, + 0x87, 0x87, 0x88, 0x85, 0x85, 0x87, + 0x89, 0x88, 0x89, 0x7f, 0x7f, 0x83, + 0x81, 0x80, 0x82, 0x88, 0x8a, 0x89, + 0x9c, 0x9c, 0x8c, 0xb2, 0xc2, 0x9a, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 111 nits */ + { MCS_GAMMACTL, + 0x00, 0xbb, 0x00, 0xc3, 0x00, 0xbe, + 0x87, 0x87, 0x88, 0x85, 0x85, 0x88, + 0x88, 0x87, 0x89, 0x80, 0x80, 0x84, + 0x81, 0x81, 0x82, 0x85, 0x86, 0x87, + 0x9c, 0x9c, 0x8b, 0xb2, 0xc2, 0x9a, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x15 }, + }, { + /* 119 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc4, + 0x87, 0x87, 0x88, 0x82, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x82, 0x81, 0x84, + 0x83, 0x82, 0x83, 0x80, 0x81, 0x84, + 0x9c, 0x9c, 0x8c, 0xb2, 0xc2, 0x9a, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x02, 0x14 }, + }, { + /* 126 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc4, + 0x87, 0x87, 0x88, 0x82, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x82, 0x81, 0x84, + 0x83, 0x82, 0x83, 0x80, 0x81, 0x84, + 0x9c, 0x9c, 0x8d, 0xb2, 0xc2, 0x9a, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x01, 0xde }, + }, { + /* 134 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc4, + 0x87, 0x87, 0x88, 0x82, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x82, 0x81, 0x84, + 0x83, 0x82, 0x83, 0x80, 0x81, 0x84, + 0x9c, 0x9c, 0x8d, 0xa4, 0xb0, 0x92, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x01, 0x94 }, + }, { + /* 143 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc3, + 0x87, 0x87, 0x88, 0x82, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x82, 0x81, 0x85, + 0x83, 0x82, 0x83, 0x80, 0x81, 0x84, + 0x92, 0x92, 0x89, 0xab, 0xb6, 0x96, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x01, 0x46 }, + }, { + /* 152 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc3, + 0x87, 0x87, 0x88, 0x83, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x81, 0x81, 0x85, + 0x84, 0x82, 0x83, 0x80, 0x81, 0x83, + 0x92, 0x92, 0x8b, 0xab, 0xb6, 0x96, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0xfa }, + }, { + /* 162 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc3, + 0x87, 0x87, 0x88, 0x83, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x81, 0x81, 0x84, + 0x84, 0x82, 0x84, 0x80, 0x81, 0x83, + 0x92, 0x92, 0x8b, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0xac }, + }, { + /* 172 nits */ + { MCS_GAMMACTL, + 0x00, 0xc0, 0x00, 0xc8, 0x00, 0xc3, + 0x87, 0x87, 0x88, 0x83, 0x84, 0x86, + 0x87, 0x85, 0x87, 0x81, 0x81, 0x84, + 0x84, 0x82, 0x83, 0x80, 0x81, 0x84, + 0x93, 0x92, 0x8c, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x57 }, + }, { + /* 183 nits */ + { MCS_GAMMACTL, + 0x00, 0xc2, 0x00, 0xca, 0x00, 0xc5, + 0x86, 0x86, 0x87, 0x85, 0x84, 0x87, + 0x87, 0x86, 0x88, 0x7e, 0x80, 0x83, + 0x84, 0x82, 0x83, 0x80, 0x81, 0x83, + 0x93, 0x92, 0x8c, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 195 nits */ + { MCS_GAMMACTL, + 0x00, 0xc7, 0x00, 0xce, 0x00, 0xc9, + 0x86, 0x87, 0x86, 0x83, 0x83, 0x85, + 0x85, 0x84, 0x86, 0x82, 0x82, 0x85, + 0x80, 0x80, 0x81, 0x81, 0x81, 0x84, + 0x93, 0x92, 0x8c, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 207 nits */ + { MCS_GAMMACTL, + 0x00, 0xcc, 0x00, 0xd2, 0x00, 0xce, + 0x86, 0x86, 0x87, 0x81, 0x83, 0x84, + 0x84, 0x82, 0x84, 0x83, 0x83, 0x85, + 0x81, 0x81, 0x82, 0x7c, 0x7d, 0x81, + 0x93, 0x92, 0x8c, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 220 nits */ + { MCS_GAMMACTL, + 0x00, 0xd1, 0x00, 0xd6, 0x00, 0xd3, + 0x86, 0x86, 0x86, 0x81, 0x83, 0x84, + 0x84, 0x82, 0x84, 0x80, 0x80, 0x83, + 0x81, 0x81, 0x82, 0x7c, 0x7d, 0x81, + 0x93, 0x92, 0x8c, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 234 nits */ + { MCS_GAMMACTL, + 0x00, 0xd6, 0x00, 0xdb, 0x00, 0xd8, + 0x85, 0x85, 0x85, 0x81, 0x83, 0x84, + 0x83, 0x82, 0x83, 0x80, 0x80, 0x82, + 0x84, 0x82, 0x83, 0x79, 0x79, 0x7e, + 0x93, 0x92, 0x8d, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 249 nits */ + { MCS_GAMMACTL, + 0x00, 0xdc, 0x00, 0xe0, 0x00, 0xdd, + 0x84, 0x84, 0x84, 0x81, 0x82, 0x83, + 0x84, 0x82, 0x84, 0x7f, 0x7f, 0x82, + 0x81, 0x80, 0x81, 0x80, 0x81, 0x82, + 0x8c, 0x8c, 0x86, 0x9d, 0xa4, 0x8e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 265 nits */ + { MCS_GAMMACTL, + 0x00, 0xe2, 0x00, 0xe5, 0x00, 0xe3, + 0x83, 0x83, 0x83, 0x81, 0x82, 0x83, + 0x82, 0x82, 0x83, 0x82, 0x81, 0x83, + 0x7f, 0x7e, 0x80, 0x7c, 0x7d, 0x80, + 0x8c, 0x8c, 0x86, 0x8e, 0x92, 0x87, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 282 nits */ + { MCS_GAMMACTL, + 0x00, 0xe8, 0x00, 0xea, 0x00, 0xe9, + 0x83, 0x83, 0x83, 0x80, 0x82, 0x82, + 0x81, 0x82, 0x82, 0x82, 0x81, 0x82, + 0x81, 0x80, 0x81, 0x80, 0x80, 0x81, + 0x85, 0x85, 0x83, 0x8e, 0x92, 0x87, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 300 nits */ + { MCS_GAMMACTL, + 0x00, 0xed, 0x00, 0xef, 0x00, 0xed, + 0x81, 0x82, 0x81, 0x81, 0x81, 0x82, + 0x82, 0x82, 0x83, 0x80, 0x80, 0x81, + 0x81, 0x81, 0x82, 0x83, 0x83, 0x83, + 0x80, 0x80, 0x7f, 0x8e, 0x92, 0x87, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 316 nits */ + { MCS_GAMMACTL, + 0x00, 0xf3, 0x00, 0xf4, 0x00, 0xf3, + 0x80, 0x81, 0x80, 0x81, 0x81, 0x81, + 0x82, 0x82, 0x82, 0x81, 0x80, 0x81, + 0x82, 0x82, 0x83, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x7f, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 333 nits */ + { MCS_GAMMACTL, + 0x00, 0xf8, 0x00, 0xf8, 0x00, 0xf8, + 0x80, 0x81, 0x80, 0x81, 0x80, 0x81, + 0x81, 0x82, 0x82, 0x81, 0x80, 0x81, + 0x83, 0x83, 0x83, 0x7e, 0x7d, 0x7e, + 0x80, 0x80, 0x7f, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 360 nits */ + { MCS_GAMMACTL, + 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 378 nits */ + { MCS_GAMMACTL, + 0x01, 0x04, 0x01, 0x03, 0x01, 0x04, + 0x7f, 0x7f, 0x80, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x80, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x80, 0x80, 0x7f, 0x7f, 0x7f, 0x7f, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 395 nits */ + { MCS_GAMMACTL, + 0x01, 0x09, 0x01, 0x07, 0x01, 0x08, + 0x7e, 0x7f, 0x80, 0x7f, 0x7f, 0x7f, + 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, + 0x80, 0x7f, 0x7f, 0x7e, 0x7e, 0x7e, + 0x80, 0x80, 0x7f, 0x7e, 0x7e, 0x7f, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 413 nits */ + { MCS_GAMMACTL, + 0x01, 0x0e, 0x01, 0x0b, 0x01, 0x0c, + 0x7e, 0x7f, 0x80, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7f, 0x7f, 0x7f, + 0x80, 0x7f, 0x7f, 0x7d, 0x7d, 0x7d, + 0x80, 0x80, 0x7f, 0x7d, 0x7e, 0x7e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 430 nits */ + { MCS_GAMMACTL, + 0x01, 0x13, 0x01, 0x0f, 0x01, 0x10, + 0x7d, 0x7f, 0x80, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7e, 0x7f, 0x7f, 0x7f, + 0x80, 0x7f, 0x7f, 0x7d, 0x7d, 0x7d, + 0x80, 0x80, 0x7f, 0x7c, 0x7d, 0x7e, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 448 nits */ + { MCS_GAMMACTL, + 0x01, 0x18, 0x01, 0x13, 0x01, 0x14, + 0x7c, 0x7e, 0x80, 0x7e, 0x7e, 0x7e, + 0x7e, 0x7e, 0x7d, 0x7e, 0x7f, 0x7e, + 0x80, 0x7f, 0x7f, 0x7c, 0x7c, 0x7c, + 0x80, 0x80, 0x7e, 0x7b, 0x7c, 0x7d, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 465 nits */ + { MCS_GAMMACTL, + 0x01, 0x1d, 0x01, 0x17, 0x01, 0x18, + 0x7c, 0x7e, 0x80, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7d, 0x7e, 0x7f, 0x7e, + 0x80, 0x7f, 0x7f, 0x7b, 0x7b, 0x7b, + 0x80, 0x80, 0x7e, 0x7a, 0x7c, 0x7d, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 483 nits */ + { MCS_GAMMACTL, + 0x01, 0x22, 0x01, 0x1b, 0x01, 0x1c, + 0x7b, 0x7e, 0x80, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7c, 0x7e, 0x7f, 0x7e, + 0x80, 0x7f, 0x7f, 0x7a, 0x7a, 0x7a, + 0x80, 0x80, 0x7e, 0x79, 0x7b, 0x7c, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, { + /* 500 nits */ + { MCS_GAMMACTL, + 0x01, 0x27, 0x01, 0x1f, 0x01, 0x20, + 0x7b, 0x7e, 0x80, 0x7d, 0x7d, 0x7d, + 0x7d, 0x7d, 0x7c, 0x7e, 0x7f, 0x7e, + 0x80, 0x7f, 0x7f, 0x7a, 0x7a, 0x7a, + 0x81, 0x80, 0x7e, 0x79, 0x7b, 0x7c, + 0x00, 0x00, 0x00, }, + { MCS_AIDCTL, 0x00, 0x10 }, + }, +}; + +struct s6e8aa5x01_ams561ra01_ctx { + struct drm_panel panel; + struct mipi_dsi_device *dsi; + struct backlight_device *bl; + struct gpio_desc *reset_gpio; + struct regulator_bulk_data *supplies; + u32 nr_supplies; +}; + +static const struct regulator_bulk_data s6e8aa5x01_ams561ra01_supplies[] = { + { .supply = "vdd" }, + { .supply = "vci" }, +}; + +static inline struct s6e8aa5x01_ams561ra01_ctx *to_ctx(struct drm_panel *panel) +{ + return container_of(panel, struct s6e8aa5x01_ams561ra01_ctx, panel); +} + +static int s6e8aa5x01_ams561ra01_update_status(struct backlight_device *bl) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = bl_get_data(bl); + struct mipi_dsi_multi_context dsi = { .dsi = ctx->dsi }; + u16 lvl = backlight_get_brightness(bl); + + if (!ctx->panel.enabled) + return 0; + + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_ACCESSPROT, 0x5a, 0x5a); + + mipi_dsi_dcs_write_buffer_multi(&dsi, + s6e8aa5x01_ams561ra01_cmds[lvl].gamma, + GAMMA_CMD_LEN); + mipi_dsi_dcs_write_buffer_multi(&dsi, + s6e8aa5x01_ams561ra01_cmds[lvl].aid, + AID_CMD_LEN); + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_GAMMAUPD, 0x03); + + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_ACCESSPROT, 0xa5, 0xa5); + + return dsi.accum_err; +} + +static int s6e8aa5x01_ams561ra01_prepare(struct drm_panel *panel) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = to_ctx(panel); + struct device *dev = &ctx->dsi->dev; + int ret; + + ret = regulator_bulk_enable(ctx->nr_supplies, ctx->supplies); + if (ret < 0) { + dev_err(dev, "failed to enable regulators: %d\n", ret); + return ret; + } + + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + usleep_range(5000, 6000); + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + usleep_range(5000, 6000); + gpiod_set_value_cansleep(ctx->reset_gpio, 0); + usleep_range(10000, 11000); + + return 0; +} + +static int s6e8aa5x01_ams561ra01_unprepare(struct drm_panel *panel) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = to_ctx(panel); + + gpiod_set_value_cansleep(ctx->reset_gpio, 1); + usleep_range(5000, 6000); + + regulator_bulk_disable(ctx->nr_supplies, ctx->supplies); + + return 0; +} + +static int s6e8aa5x01_ams561ra01_enable(struct drm_panel *panel) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = to_ctx(panel); + struct mipi_dsi_multi_context dsi = { .dsi = ctx->dsi }; + + mipi_dsi_dcs_exit_sleep_mode_multi(&dsi); + mipi_dsi_msleep(&dsi, 100); + + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_ACCESSPROT, 0x5a, 0x5a); + + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_PENTILE, 0xd8, 0xd8, 0x00); + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_PCD, 0x5c); + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_ERRFLAG, 0xed, 0xc7, 0x23, 0x67); + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_DISPCTL, 0x0c, 0x0c, 0xb9, 0x01); + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_LTPSCTL, + 0x00, 0x45, 0x10, 0x10, 0x08, 0x32, 0x54, 0x00, + 0x00, 0x00, 0x00, 0x07, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x48, 0x5e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0xad, 0x00, 0x00, + 0x08, 0x05, 0x2a, 0x54, 0x03, 0xcc, 0x00, 0xff, + 0xfb, 0x03, 0x0d, 0x00, 0x11, 0x0f, 0x02, 0x03, + 0x0b, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x00, 0x02, 0x03, 0x0b, + 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x13); + + mipi_dsi_dcs_write_seq_multi(&dsi, MCS_ACCESSPROT, 0xa5, 0xa5); + + mipi_dsi_dcs_set_display_on_multi(&dsi); + + return dsi.accum_err; +} + +static int s6e8aa5x01_ams561ra01_disable(struct drm_panel *panel) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = to_ctx(panel); + struct mipi_dsi_multi_context dsi = { .dsi = ctx->dsi }; + + mipi_dsi_dcs_set_display_off_multi(&dsi); + mipi_dsi_msleep(&dsi, 100); + + mipi_dsi_dcs_enter_sleep_mode_multi(&dsi); + mipi_dsi_msleep(&dsi, 150); + + return dsi.accum_err; +} + +static const struct drm_display_mode s6e8aa5x01_ams561ra01_mode = { + .clock = (720 + 62 + 2 + 26) * (1480 + 12 + 2 + 10) * 60 / 1000, + .hdisplay = 720, + .hsync_start = 720 + 62, + .hsync_end = 720 + 62 + 2, + .htotal = 720 + 62 + 2 + 26, + .vdisplay = 1480, + .vsync_start = 1480 + 12, + .vsync_end = 1480 + 12 + 2, + .vtotal = 1480 + 12 + 2 + 10, + .width_mm = 62, + .height_mm = 128, + .type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +}; + +static int s6e8aa5x01_ams561ra01_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + return drm_connector_helper_get_modes_fixed(connector, + &s6e8aa5x01_ams561ra01_mode); +} + +static const struct backlight_ops s6e8aa5x01_ams561ra01_bl_ops = { + .update_status = s6e8aa5x01_ams561ra01_update_status, +}; + +static const struct drm_panel_funcs s6e8aa5x01_ams561ra01_panel_funcs = { + .prepare = s6e8aa5x01_ams561ra01_prepare, + .unprepare = s6e8aa5x01_ams561ra01_unprepare, + .enable = s6e8aa5x01_ams561ra01_enable, + .disable = s6e8aa5x01_ams561ra01_disable, + .get_modes = s6e8aa5x01_ams561ra01_get_modes, +}; + +static int s6e8aa5x01_ams561ra01_probe(struct mipi_dsi_device *dsi) +{ + struct device *dev = &dsi->dev; + struct s6e8aa5x01_ams561ra01_ctx *ctx; + int ret; + + ctx = devm_drm_panel_alloc(dev, struct s6e8aa5x01_ams561ra01_ctx, panel, + &s6e8aa5x01_ams561ra01_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ctx->dsi = dsi; + mipi_dsi_set_drvdata(dsi, ctx); + + ctx->nr_supplies = ARRAY_SIZE(s6e8aa5x01_ams561ra01_supplies); + ret = devm_regulator_bulk_get_const(dev, ctx->nr_supplies, + s6e8aa5x01_ams561ra01_supplies, + &ctx->supplies); + if (ret < 0) + return dev_err_probe(dev, ret, "failed to get regulators\n"); + + ctx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS); + if (IS_ERR(ctx->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(ctx->reset_gpio), + "failed to get reset-gpios\n"); + + ctx->bl = devm_backlight_device_register(dev, dev_name(dev), dev, ctx, + &s6e8aa5x01_ams561ra01_bl_ops, + NULL); + if (IS_ERR(ctx->bl)) + return dev_err_probe(dev, PTR_ERR(ctx->bl), + "failed to register backlight device\n"); + + ctx->bl->props.type = BACKLIGHT_PLATFORM; + ctx->bl->props.brightness = ARRAY_SIZE(s6e8aa5x01_ams561ra01_cmds) - 1; + ctx->bl->props.max_brightness = ctx->bl->props.brightness; + + dsi->lanes = 4; + dsi->format = MIPI_DSI_FMT_RGB888; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_VIDEO_NO_HFP; + + ctx->panel.prepare_prev_first = true; + drm_panel_add(&ctx->panel); + + ret = devm_mipi_dsi_attach(dev, dsi); + if (ret < 0) { + drm_panel_remove(&ctx->panel); + return dev_err_probe(dev, ret, "failed to attach to DSI host\n"); + } + + return 0; +} + +static void s6e8aa5x01_ams561ra01_remove(struct mipi_dsi_device *dsi) +{ + struct s6e8aa5x01_ams561ra01_ctx *ctx = mipi_dsi_get_drvdata(dsi); + + drm_panel_remove(&ctx->panel); +} + +static const struct of_device_id s6e8aa5x01_ams561ra01_of_device_id[] = { + { .compatible = "samsung,s6e8aa5x01-ams561ra01" }, + { } +}; +MODULE_DEVICE_TABLE(of, s6e8aa5x01_ams561ra01_of_device_id); + +static struct mipi_dsi_driver s6e8aa5x01_ams561ra01_dsi_driver = { + .probe = s6e8aa5x01_ams561ra01_probe, + .remove = s6e8aa5x01_ams561ra01_remove, + .driver = { + .name = "panel-samsung-s6e8aa5x01-ams561ra01", + .of_match_table = s6e8aa5x01_ams561ra01_of_device_id, + }, +}; +module_mipi_dsi_driver(s6e8aa5x01_ams561ra01_dsi_driver); + +MODULE_AUTHOR("Kaustabh Chakraborty <kauschluss@disroot.org>"); +MODULE_DESCRIPTION("Samsung AMS561RA01 Panel with S6E8AA5X01 Controller"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c index a0d76d588da1..d159b0e4fdb6 100644 --- a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c +++ b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c @@ -279,9 +279,6 @@ static int sharp_panel_add(struct sharp_panel *sharp) if (IS_ERR(sharp->supply)) return PTR_ERR(sharp->supply); - drm_panel_init(&sharp->base, &sharp->link1->dev, &sharp_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&sharp->base); if (ret) return ret; @@ -323,10 +320,12 @@ static int sharp_panel_probe(struct mipi_dsi_device *dsi) /* register a panel for only the DSI-LINK1 interface */ if (secondary) { - sharp = devm_kzalloc(&dsi->dev, sizeof(*sharp), GFP_KERNEL); - if (!sharp) { + sharp = devm_drm_panel_alloc(&dsi->dev, __typeof(*sharp), base, + &sharp_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(sharp)) { put_device(&secondary->dev); - return -ENOMEM; + return PTR_ERR(sharp); } mipi_dsi_set_drvdata(dsi, sharp); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 47222d2d8129..0019de93be1b 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -26,6 +26,7 @@ #include <linux/i2c.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -136,6 +137,14 @@ struct panel_desc { int connector_type; }; +struct panel_desc_dsi { + struct panel_desc desc; + + unsigned long flags; + enum mipi_dsi_pixel_format format; + unsigned int lanes; +}; + struct panel_simple { struct drm_panel base; @@ -430,10 +439,7 @@ static const struct drm_panel_funcs panel_simple_funcs = { .get_timings = panel_simple_get_timings, }; -static struct panel_desc panel_dpi; - -static int panel_dpi_probe(struct device *dev, - struct panel_simple *panel) +static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; const struct device_node *np; @@ -445,17 +451,17 @@ static int panel_dpi_probe(struct device *dev, np = dev->of_node; desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); if (!timing) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_get_display_timing(np, "panel-timing", timing); if (ret < 0) { dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n", np); - return ret; + return ERR_PTR(ret); } desc->timings = timing; @@ -473,9 +479,7 @@ static int panel_dpi_probe(struct device *dev, /* We do not know the connector for the DT node, so guess it */ desc->connector_type = DRM_MODE_CONNECTOR_DPI; - panel->desc = desc; - - return 0; + return desc; } #define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \ @@ -570,8 +574,44 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static const struct panel_desc *panel_simple_get_desc(struct device *dev) +{ + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) && + dev_is_mipi_dsi(dev)) { + const struct panel_desc_dsi *dsi_desc; + + dsi_desc = of_device_get_match_data(dev); + if (!dsi_desc) + return ERR_PTR(-ENODEV); + + return &dsi_desc->desc; + } + + if (dev_is_platform(dev)) { + const struct panel_desc *desc; + + desc = of_device_get_match_data(dev); + if (!desc) { + /* + * panel-dpi probes without a descriptor and + * panel_dpi_probe() will initialize one for us + * based on the device tree. + */ + if (of_device_is_compatible(dev->of_node, "panel-dpi")) + return panel_dpi_probe(dev); + else + return ERR_PTR(-ENODEV); + } + + return desc; + } + + return ERR_PTR(-ENODEV); +} + +static struct panel_simple *panel_simple_probe(struct device *dev) { + const struct panel_desc *desc; struct panel_simple *panel; struct display_timing dt; struct device_node *ddc; @@ -579,27 +619,31 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) u32 bus_flags; int err; + desc = panel_simple_get_desc(dev); + if (IS_ERR(desc)) + return ERR_CAST(desc); + panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) - return PTR_ERR(panel); + return ERR_CAST(panel); panel->desc = desc; panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) - return PTR_ERR(panel->supply); + return ERR_CAST(panel->supply); panel->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(panel->enable_gpio)) - return dev_err_probe(dev, PTR_ERR(panel->enable_gpio), - "failed to request GPIO\n"); + return dev_err_cast_probe(dev, panel->enable_gpio, + "failed to request GPIO\n"); err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation); if (err) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); - return err; + return ERR_PTR(err); } ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0); @@ -608,19 +652,12 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) of_node_put(ddc); if (!panel->ddc) - return -EPROBE_DEFER; + return ERR_PTR(-EPROBE_DEFER); } - if (desc == &panel_dpi) { - /* Handle the generic panel-dpi binding */ - err = panel_dpi_probe(dev, panel); - if (err) - goto free_ddc; - desc = panel->desc; - } else { - if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) - panel_simple_parse_panel_timing_node(dev, panel, &dt); - } + if (!of_device_is_compatible(dev->of_node, "panel-dpi") && + !of_get_display_timing(dev->of_node, "panel-timing", &dt)) + panel_simple_parse_panel_timing_node(dev, panel, &dt); if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) { /* Optional data-mapping property for overriding bus format */ @@ -703,7 +740,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) drm_panel_add(&panel->base); - return 0; + return panel; disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); @@ -712,7 +749,7 @@ free_ddc: if (panel->ddc) put_device(&panel->ddc->dev); - return err; + return ERR_PTR(err); } static void panel_simple_shutdown(struct device *dev) @@ -3679,6 +3716,29 @@ static const struct panel_desc olimex_lcd_olinuxino_43ts = { .bus_format = MEDIA_BUS_FMT_RGB888_1X24, }; +static const struct drm_display_mode olimex_lcd_olinuxino_5cts_mode = { + .clock = 33300, + .hdisplay = 800, + .hsync_start = 800 + 210, + .hsync_end = 800 + 210 + 20, + .htotal = 800 + 210 + 20 + 26, + .vdisplay = 480, + .vsync_start = 480 + 22, + .vsync_end = 480 + 22 + 10, + .vtotal = 480 + 22 + 10 + 13, +}; + +static const struct panel_desc olimex_lcd_olinuxino_5cts = { + .modes = &olimex_lcd_olinuxino_5cts_mode, + .num_modes = 1, + .size = { + .width = 154, + .height = 86, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, +}; + + static const struct display_timing ontat_kd50g21_40nt_a1_timing = { .pixelclock = { 30000000, 30000000, 50000000 }, .hactive = { 800, 800, 800 }, @@ -5242,6 +5302,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "olimex,lcd-olinuxino-43-ts", .data = &olimex_lcd_olinuxino_43ts, }, { + .compatible = "olimex,lcd-olinuxino-5-cts", + .data = &olimex_lcd_olinuxino_5cts, + }, { .compatible = "ontat,kd50g21-40nt-a1", .data = &ontat_kd50g21_40nt_a1, }, { @@ -5394,7 +5457,12 @@ static const struct of_device_id platform_of_match[] = { }, { /* Must be the last entry */ .compatible = "panel-dpi", - .data = &panel_dpi, + + /* + * Explicitly NULL, the panel_desc structure will be + * allocated by panel_dpi_probe(). + */ + .data = NULL, }, { /* sentinel */ } @@ -5403,13 +5471,13 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { - const struct panel_desc *desc; + struct panel_simple *panel; - desc = of_device_get_match_data(&pdev->dev); - if (!desc) - return -ENODEV; + panel = panel_simple_probe(&pdev->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); - return panel_simple_probe(&pdev->dev, desc); + return 0; } static void panel_simple_platform_remove(struct platform_device *pdev) @@ -5439,14 +5507,6 @@ static struct platform_driver panel_simple_platform_driver = { .shutdown = panel_simple_platform_shutdown, }; -struct panel_desc_dsi { - struct panel_desc desc; - - unsigned long flags; - enum mipi_dsi_pixel_format format; - unsigned int lanes; -}; - static const struct drm_display_mode auo_b080uan01_mode = { .clock = 154500, .hdisplay = 1200, @@ -5680,16 +5740,14 @@ MODULE_DEVICE_TABLE(of, dsi_of_match); static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) { const struct panel_desc_dsi *desc; + struct panel_simple *panel; int err; - desc = of_device_get_match_data(&dsi->dev); - if (!desc) - return -ENODEV; - - err = panel_simple_probe(&dsi->dev, &desc->desc); - if (err < 0) - return err; + panel = panel_simple_probe(&dsi->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); + desc = container_of(panel->desc, struct panel_desc_dsi, desc); dsi->mode_flags = desc->flags; dsi->format = desc->format; dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7703.c b/drivers/gpu/drm/panel/panel-sitronix-st7703.c index 1a007a244d84..6c348fe28955 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7703.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7703.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Driver for panels based on Sitronix ST7703 controller, souch as: + * Driver for panels based on Sitronix ST7703 controller, such as: * * - Rocktech jh057n00900 5.5" MIPI-DSI panel * diff --git a/drivers/gpu/drm/panel/panel-summit.c b/drivers/gpu/drm/panel/panel-summit.c index 4854437e2899..6d40b9ddfe02 100644 --- a/drivers/gpu/drm/panel/panel-summit.c +++ b/drivers/gpu/drm/panel/panel-summit.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/backlight.h> +#include <linux/mod_devicetable.h> +#include <linux/property.h> #include <drm/drm_device.h> #include <drm/drm_mipi_dsi.h> #include <drm/drm_mode.h> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index bb73f2a68a12..85d6289a6eda 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -432,7 +432,7 @@ static void panfrost_gem_debugfs_bo_print(struct panfrost_gem_object *bo, if (!refcount) return; - resident_size = bo->base.pages ? bo->base.base.size : 0; + resident_size = panfrost_gem_rss(&bo->base.base); snprintf(creator_info, sizeof(creator_info), "%s/%d", bo->debugfs.creator.process_name, bo->debugfs.creator.tgid); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 5657106c2f7d..82acabb21b27 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -751,11 +751,11 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job int js = panfrost_job_get_slot(job); /* - * If the GPU managed to complete this jobs fence, the timeout is - * spurious. Bail out. + * If the GPU managed to complete this jobs fence, the timeout has + * fired before free-job worker. The timeout is spurious, so bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; /* * Panfrost IRQ handler may take a long time to process an interrupt @@ -770,7 +770,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job if (dma_fence_is_signaled(job->done_fence)) { dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", @@ -786,7 +786,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job atomic_set(&pfdev->reset.pending, 1); panfrost_reset(pfdev, sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void panfrost_reset_work(struct work_struct *work) @@ -841,7 +841,6 @@ int panfrost_job_init(struct panfrost_device *pfdev) .num_rqs = DRM_SCHED_PRIORITY_COUNT, .credit_limit = 2, .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), - .timeout_wq = pfdev->reset.wq, .name = "pan_js", .dev = pfdev->dev, }; @@ -879,6 +878,7 @@ int panfrost_job_init(struct panfrost_device *pfdev) pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); if (!pfdev->reset.wq) return -ENOMEM; + args.timeout_wq = pfdev->reset.wq; for (j = 0; j < NUM_JOB_SLOTS; j++) { js->queue[j].fence_context = dma_fence_context_alloc(1); diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 563f16bae543..0dd62e8b2fa7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -203,7 +203,6 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); panfrost_gem_mapping_put(perfcnt->mapping); perfcnt->mapping = NULL; - pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); return 0; @@ -279,7 +278,6 @@ void panfrost_perfcnt_close(struct drm_file *file_priv) if (perfcnt->user == pfile) panfrost_perfcnt_disable_locked(pfdev, file_priv); mutex_unlock(&perfcnt->lock); - pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_put_autosuspend(pfdev->dev); } diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile index 15294719b09c..02db21748c12 100644 --- a/drivers/gpu/drm/panthor/Makefile +++ b/drivers/gpu/drm/panthor/Makefile @@ -8,6 +8,7 @@ panthor-y := \ panthor_gem.o \ panthor_gpu.o \ panthor_heap.o \ + panthor_hw.o \ panthor_mmu.o \ panthor_sched.o diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index f0b2da5b2b96..81df49880bd8 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -18,6 +18,7 @@ #include "panthor_device.h" #include "panthor_fw.h" #include "panthor_gpu.h" +#include "panthor_hw.h" #include "panthor_mmu.h" #include "panthor_regs.h" #include "panthor_sched.h" @@ -244,6 +245,10 @@ int panthor_device_init(struct panthor_device *ptdev) return ret; } + ret = panthor_hw_init(ptdev); + if (ret) + goto err_rpm_put; + ret = panthor_gpu_init(ptdev); if (ret) goto err_rpm_put; diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 1116f2d2826e..4c202fc5ce05 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1094,7 +1094,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, struct drm_panthor_queue_create *queue_args; int ret; - if (!args->queues.count) + if (!args->queues.count || args->queues.count > MAX_CS_PER_CSG) return -EINVAL; ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); @@ -1103,14 +1103,15 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, ret = group_priority_permit(file, args->priority); if (ret) - return ret; + goto out; ret = panthor_group_create(pfile, args, queue_args); - if (ret >= 0) { - args->group_handle = ret; - ret = 0; - } + if (ret < 0) + goto out; + args->group_handle = ret; + ret = 0; +out: kvfree(queue_args); return ret; } @@ -1400,14 +1401,9 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) struct panthor_file *pfile; int ret; - if (!try_module_get(THIS_MODULE)) - return -EINVAL; - pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); - if (!pfile) { - ret = -ENOMEM; - goto err_put_mod; - } + if (!pfile) + return -ENOMEM; pfile->ptdev = ptdev; pfile->user_mmio.offset = DRM_PANTHOR_USER_MMIO_OFFSET; @@ -1439,9 +1435,6 @@ err_destroy_vm_pool: err_free_file: kfree(pfile); - -err_put_mod: - module_put(THIS_MODULE); return ret; } @@ -1454,7 +1447,6 @@ panthor_postclose(struct drm_device *ddev, struct drm_file *file) panthor_vm_pool_destroy(pfile); kfree(pfile); - module_put(THIS_MODULE); } static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { @@ -1555,6 +1547,7 @@ static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file) } static const struct file_operations panthor_drm_driver_fops = { + .owner = THIS_MODULE, .open = drm_open, .release = drm_release, .unlocked_ioctl = drm_ioctl, diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 36f1034839c2..df767e82148a 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1099,6 +1099,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) } panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_stop(ptdev); } /** @@ -1402,3 +1403,8 @@ err_unplug_fw: } MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch10.10/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch10.12/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch11.8/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch12.8/mali_csffw.bin"); +MODULE_FIRMWARE("arm/mali/arch13.8/mali_csffw.bin"); diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 7c00fd77758b..156c7a0b62a2 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -16,10 +16,15 @@ #include "panthor_mmu.h" #ifdef CONFIG_DEBUG_FS -static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev, - struct panthor_gem_object *bo) +static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) { INIT_LIST_HEAD(&bo->debugfs.node); +} + +static void panthor_gem_debugfs_bo_add(struct panthor_gem_object *bo) +{ + struct panthor_device *ptdev = container_of(bo->base.base.dev, + struct panthor_device, base); bo->debugfs.creator.tgid = current->group_leader->pid; get_task_comm(bo->debugfs.creator.process_name, current->group_leader); @@ -44,14 +49,13 @@ static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) { - bo->debugfs.flags = usage_flags | PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED; + bo->debugfs.flags = usage_flags; + panthor_gem_debugfs_bo_add(bo); } #else -static void panthor_gem_debugfs_bo_add(struct panthor_device *ptdev, - struct panthor_gem_object *bo) -{} static void panthor_gem_debugfs_bo_rm(struct panthor_gem_object *bo) {} static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u32 usage_flags) {} +static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {} #endif static void panthor_gem_free_object(struct drm_gem_object *obj) @@ -70,7 +74,6 @@ static void panthor_gem_free_object(struct drm_gem_object *obj) mutex_destroy(&bo->label.lock); drm_gem_free_mmap_offset(&bo->base.base); - mutex_destroy(&bo->gpuva_list_lock); drm_gem_shmem_free(&bo->base); drm_gem_object_put(vm_root_gem); } @@ -242,11 +245,9 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t obj->base.base.funcs = &panthor_gem_funcs; obj->base.map_wc = !ptdev->coherent; - mutex_init(&obj->gpuva_list_lock); - drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); mutex_init(&obj->label.lock); - panthor_gem_debugfs_bo_add(ptdev, obj); + panthor_gem_debugfs_bo_init(obj); return &obj->base.base; } @@ -285,6 +286,8 @@ panthor_gem_create_with_handle(struct drm_file *file, bo->base.base.resv = bo->exclusive_vm_root_gem->resv; } + panthor_gem_debugfs_set_usage_flags(bo, 0); + /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. @@ -296,12 +299,6 @@ panthor_gem_create_with_handle(struct drm_file *file, /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); - /* - * No explicit flags are needed in the call below, since the - * function internally sets the INITIALIZED bit for us. - */ - panthor_gem_debugfs_set_usage_flags(bo, 0); - return ret; } @@ -387,7 +384,7 @@ static void panthor_gem_debugfs_bo_print(struct panthor_gem_object *bo, unsigned int refcount = kref_read(&bo->base.base.refcount); char creator_info[32] = {}; size_t resident_size; - u32 gem_usage_flags = bo->debugfs.flags & (u32)~PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED; + u32 gem_usage_flags = bo->debugfs.flags; u32 gem_state_flags = 0; /* Skip BOs being destroyed. */ @@ -436,8 +433,7 @@ void panthor_gem_debugfs_print_bos(struct panthor_device *ptdev, scoped_guard(mutex, &ptdev->gems.lock) { list_for_each_entry(bo, &ptdev->gems.node, debugfs.node) { - if (bo->debugfs.flags & PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED) - panthor_gem_debugfs_bo_print(bo, m, &totals); + panthor_gem_debugfs_bo_print(bo, m, &totals); } } diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h index 4dd732dcd59f..80c6e24112d0 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.h +++ b/drivers/gpu/drm/panthor/panthor_gem.h @@ -35,9 +35,6 @@ enum panthor_debugfs_gem_usage_flags { /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED: BO is mapped on the FW VM. */ PANTHOR_DEBUGFS_GEM_USAGE_FLAG_FW_MAPPED = BIT(PANTHOR_DEBUGFS_GEM_USAGE_FW_MAPPED_BIT), - - /** @PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED: BO is ready for DebugFS display. */ - PANTHOR_DEBUGFS_GEM_USAGE_FLAG_INITIALIZED = BIT(31), }; /** @@ -82,18 +79,6 @@ struct panthor_gem_object { */ struct drm_gem_object *exclusive_vm_root_gem; - /** - * @gpuva_list_lock: Custom GPUVA lock. - * - * Used to protect insertion of drm_gpuva elements to the - * drm_gem_object.gpuva.list list. - * - * We can't use the GEM resv for that, because drm_gpuva_link() is - * called in a dma-signaling path, where we're not allowed to take - * resv locks. - */ - struct mutex gpuva_list_lock; - /** @flags: Combination of drm_panthor_bo_flags flags. */ u32 flags; diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 534735518824..db69449a5be0 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -35,40 +35,9 @@ struct panthor_gpu { /** @reqs_acked: GPU request wait queue. */ wait_queue_head_t reqs_acked; -}; - -/** - * struct panthor_model - GPU model description - */ -struct panthor_model { - /** @name: Model name. */ - const char *name; - - /** @arch_major: Major version number of architecture. */ - u8 arch_major; - /** @product_major: Major version number of product. */ - u8 product_major; -}; - -/** - * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified - * by a combination of the major architecture version and the major product - * version. - * @_name: Name for the GPU model. - * @_arch_major: Architecture major. - * @_product_major: Product major. - */ -#define GPU_MODEL(_name, _arch_major, _product_major) \ -{\ - .name = __stringify(_name), \ - .arch_major = _arch_major, \ - .product_major = _product_major, \ -} - -static const struct panthor_model gpu_models[] = { - GPU_MODEL(g610, 10, 7), - {}, + /** @cache_flush_lock: Lock to serialize cache flushes */ + struct mutex cache_flush_lock; }; #define GPU_INTERRUPTS_MASK \ @@ -83,66 +52,6 @@ static void panthor_gpu_coherency_set(struct panthor_device *ptdev) ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); } -static void panthor_gpu_init_info(struct panthor_device *ptdev) -{ - const struct panthor_model *model; - u32 arch_major, product_major; - u32 major, minor, status; - unsigned int i; - - ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); - ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); - ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); - ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); - ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); - ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); - ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); - ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); - ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); - ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); - ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); - ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); - ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); - for (i = 0; i < 4; i++) - ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); - - ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); - - ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); - ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); - ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); - - arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); - product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); - major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); - minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); - status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); - - for (model = gpu_models; model->name; model++) { - if (model->arch_major == arch_major && - model->product_major == product_major) - break; - } - - drm_info(&ptdev->base, - "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", - model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16, - major, minor, status); - - drm_info(&ptdev->base, - "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", - ptdev->gpu_info.l2_features, - ptdev->gpu_info.tiler_features, - ptdev->gpu_info.mem_features, - ptdev->gpu_info.mmu_features, - ptdev->gpu_info.as_present); - - drm_info(&ptdev->base, - "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", - ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, - ptdev->gpu_info.tiler_present); -} - static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) { gpu_write(ptdev, GPU_INT_CLEAR, status); @@ -204,8 +113,8 @@ int panthor_gpu_init(struct panthor_device *ptdev) spin_lock_init(&gpu->reqs_lock); init_waitqueue_head(&gpu->reqs_acked); + mutex_init(&gpu->cache_flush_lock); ptdev->gpu = gpu; - panthor_gpu_init_info(ptdev); dma_set_max_seg_size(ptdev->base.dev, UINT_MAX); pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); @@ -297,8 +206,9 @@ int panthor_gpu_block_power_on(struct panthor_device *ptdev, gpu_write64(ptdev, pwron_reg, mask); - ret = gpu_read64_relaxed_poll_timeout(ptdev, pwrtrans_reg, val, - !(mask & val), 100, timeout_us); + ret = gpu_read64_relaxed_poll_timeout(ptdev, rdy_reg, val, + (mask & val) == val, + 100, timeout_us); if (ret) { drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness", blk_name, mask); @@ -352,6 +262,9 @@ int panthor_gpu_flush_caches(struct panthor_device *ptdev, bool timedout = false; unsigned long flags; + /* Serialize cache flush operations. */ + guard(mutex)(&ptdev->gpu->cache_flush_lock); + spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); if (!drm_WARN_ON(&ptdev->base, ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { diff --git a/drivers/gpu/drm/panthor/panthor_hw.c b/drivers/gpu/drm/panthor/panthor_hw.c new file mode 100644 index 000000000000..4f2858114e5e --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_hw.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#include "panthor_device.h" +#include "panthor_hw.h" +#include "panthor_regs.h" + +#define GPU_PROD_ID_MAKE(arch_major, prod_major) \ + (((arch_major) << 24) | (prod_major)) + +static char *get_gpu_model_name(struct panthor_device *ptdev) +{ + const u32 gpu_id = ptdev->gpu_info.gpu_id; + const u32 product_id = GPU_PROD_ID_MAKE(GPU_ARCH_MAJOR(gpu_id), + GPU_PROD_MAJOR(gpu_id)); + const bool ray_intersection = !!(ptdev->gpu_info.gpu_features & + GPU_FEATURES_RAY_INTERSECTION); + const u8 shader_core_count = hweight64(ptdev->gpu_info.shader_present); + + switch (product_id) { + case GPU_PROD_ID_MAKE(10, 2): + return "Mali-G710"; + case GPU_PROD_ID_MAKE(10, 3): + return "Mali-G510"; + case GPU_PROD_ID_MAKE(10, 4): + return "Mali-G310"; + case GPU_PROD_ID_MAKE(10, 7): + return "Mali-G610"; + case GPU_PROD_ID_MAKE(11, 2): + if (shader_core_count > 10 && ray_intersection) + return "Mali-G715-Immortalis"; + else if (shader_core_count >= 7) + return "Mali-G715"; + + fallthrough; + case GPU_PROD_ID_MAKE(11, 3): + return "Mali-G615"; + case GPU_PROD_ID_MAKE(12, 0): + if (shader_core_count >= 10 && ray_intersection) + return "Mali-G720-Immortalis"; + else if (shader_core_count >= 6) + return "Mali-G720"; + + fallthrough; + case GPU_PROD_ID_MAKE(12, 1): + return "Mali-G620"; + case GPU_PROD_ID_MAKE(13, 0): + if (shader_core_count >= 10 && ray_intersection) + return "Mali-G925-Immortalis"; + else if (shader_core_count >= 6) + return "Mali-G725"; + + fallthrough; + case GPU_PROD_ID_MAKE(13, 1): + return "Mali-G625"; + } + + return "(Unknown Mali GPU)"; +} + +static void panthor_gpu_info_init(struct panthor_device *ptdev) +{ + unsigned int i; + + ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); + ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); + ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); + ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); + ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); + ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); + ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); + ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); + ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); + ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); + ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); + ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); + ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); + for (i = 0; i < 4; i++) + ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); + + ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); + + ptdev->gpu_info.shader_present = gpu_read64(ptdev, GPU_SHADER_PRESENT); + ptdev->gpu_info.tiler_present = gpu_read64(ptdev, GPU_TILER_PRESENT); + ptdev->gpu_info.l2_present = gpu_read64(ptdev, GPU_L2_PRESENT); + + /* Introduced in arch 11.x */ + ptdev->gpu_info.gpu_features = gpu_read64(ptdev, GPU_FEATURES); +} + +static void panthor_hw_info_init(struct panthor_device *ptdev) +{ + u32 major, minor, status; + + panthor_gpu_info_init(ptdev); + + major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); + minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); + status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); + + drm_info(&ptdev->base, + "%s id 0x%x major 0x%x minor 0x%x status 0x%x", + get_gpu_model_name(ptdev), ptdev->gpu_info.gpu_id >> 16, + major, minor, status); + + drm_info(&ptdev->base, + "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", + ptdev->gpu_info.l2_features, + ptdev->gpu_info.tiler_features, + ptdev->gpu_info.mem_features, + ptdev->gpu_info.mmu_features, + ptdev->gpu_info.as_present); + + drm_info(&ptdev->base, + "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", + ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, + ptdev->gpu_info.tiler_present); +} + +int panthor_hw_init(struct panthor_device *ptdev) +{ + panthor_hw_info_init(ptdev); + + return 0; +} diff --git a/drivers/gpu/drm/panthor/panthor_hw.h b/drivers/gpu/drm/panthor/panthor_hw.h new file mode 100644 index 000000000000..0af6acc6aa6a --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_hw.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2025 ARM Limited. All rights reserved. */ + +#ifndef __PANTHOR_HW_H__ +#define __PANTHOR_HW_H__ + +struct panthor_device; + +int panthor_hw_init(struct panthor_device *ptdev); + +#endif /* __PANTHOR_HW_H__ */ diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..7870e7dbaa5d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -29,6 +29,7 @@ #include "panthor_device.h" #include "panthor_gem.h" +#include "panthor_gpu.h" #include "panthor_heap.h" #include "panthor_mmu.h" #include "panthor_regs.h" @@ -571,8 +572,24 @@ static void lock_region(struct panthor_device *ptdev, u32 as_nr, static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, u64 iova, u64 size, u32 op) { + const u32 l2_flush_op = CACHE_CLEAN | CACHE_INV; + u32 lsc_flush_op; + int ret; + lockdep_assert_held(&ptdev->mmu->as.slots_lock); + switch (op) { + case AS_COMMAND_FLUSH_MEM: + lsc_flush_op = CACHE_CLEAN | CACHE_INV; + break; + case AS_COMMAND_FLUSH_PT: + lsc_flush_op = 0; + break; + default: + drm_WARN(&ptdev->base, 1, "Unexpected AS_COMMAND: %d", op); + return -EINVAL; + } + if (as_nr < 0) return 0; @@ -582,13 +599,24 @@ static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, * power it up */ - if (op != AS_COMMAND_UNLOCK) - lock_region(ptdev, as_nr, iova, size); + lock_region(ptdev, as_nr, iova, size); + + ret = wait_ready(ptdev, as_nr); + if (ret) + return ret; - /* Run the MMU operation */ - write_cmd(ptdev, as_nr, op); + ret = panthor_gpu_flush_caches(ptdev, l2_flush_op, lsc_flush_op, 0); + if (ret) + return ret; - /* Wait for the flush to complete */ + /* + * Explicitly unlock the region as the AS is not unlocked automatically + * at the end of the GPU_CONTROL cache flush command, unlike + * AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT. + */ + write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK); + + /* Wait for the unlock command to complete */ return wait_ready(ptdev, as_nr); } @@ -885,17 +913,6 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) return ret; } -/** - * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS - * @vm: VM whose cache to flush - * - * Return: 0 on success, a negative error code if flush failed. - */ -int panthor_vm_flush_all(struct panthor_vm *vm) -{ - return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range); -} - static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; @@ -1085,9 +1102,9 @@ static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) * GEM vm_bo list. */ dma_resv_lock(drm_gpuvm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); unpin = drm_gpuvm_bo_put(vm_bo); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(drm_gpuvm_resv(vm)); /* If the vm_bo object was destroyed, release the pin reference that @@ -1158,10 +1175,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) break; case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: - /* Partial unmaps might trigger a remap with either a prev or a next VA, - * but not both. + /* Two VMAs can be needed for an unmap, as an unmap can happen + * in the middle of a drm_gpuva, requiring a remap with both + * prev & next VA. Or an unmap can span more than one drm_gpuva + * where the first and last ones are covered partially, requring + * a remap for the first with a prev VA and remap for the last + * with a next VA. */ - vma_count = 1; + vma_count = 2; break; default: @@ -1205,7 +1226,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) return -EINVAL; - /* Make sure the VA and size are aligned and in-bounds. */ + /* Make sure the VA and size are in-bounds. */ if (size > bo->base.base.size || offset > bo->base.base.size - size) return -EINVAL; @@ -1260,9 +1281,9 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, * calling this function. */ dma_resv_lock(panthor_vm_resv(vm), NULL); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); dma_resv_unlock(panthor_vm_resv(vm)); /* If the a vm_bo for this <VM,BO> combination exists, it already @@ -2014,10 +2035,10 @@ static void panthor_vma_link(struct panthor_vm *vm, { struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_link(&vma->base, vm_bo); drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); } static void panthor_vma_unlink(struct panthor_vm *vm, @@ -2026,9 +2047,9 @@ static void panthor_vma_unlink(struct panthor_vm *vm, struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); - mutex_lock(&bo->gpuva_list_lock); + mutex_lock(&bo->base.base.gpuva.lock); drm_gpuva_unlink(&vma->base); - mutex_unlock(&bo->gpuva_list_lock); + mutex_unlock(&bo->base.base.gpuva.lock); /* drm_gpuva_unlink() release the vm_bo, but we manually retained it * when entering this function, so we can implement deferred VMA @@ -2180,15 +2201,22 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, mutex_lock(&vm->op_lock); vm->op_ctx = op; switch (op_type) { - case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: + case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: { + const struct drm_gpuvm_map_req map_req = { + .map.va.addr = op->va.addr, + .map.va.range = op->va.range, + .map.gem.obj = op->map.vm_bo->obj, + .map.gem.offset = op->map.bo_offset, + }; + if (vm->unusable) { ret = -EINVAL; break; } - ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range, - op->map.vm_bo->obj, op->map.bo_offset); + ret = drm_gpuvm_sm_map(&vm->base, vm, &map_req); break; + } case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range); @@ -2270,7 +2298,7 @@ static enum drm_gpu_sched_stat panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) { WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops panthor_vm_bind_ops = { @@ -2391,8 +2419,9 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, * to be handled the same way user VMAs are. */ drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", - DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem, - min_va, va_range, 0, 0, &panthor_gpuvm_ops); + DRM_GPUVM_RESV_PROTECTED | DRM_GPUVM_IMMEDIATE_MODE, + &ptdev->base, dummy_gem, min_va, va_range, 0, 0, + &panthor_gpuvm_ops); drm_gem_object_put(dummy_gem); return vm; @@ -2422,7 +2451,7 @@ panthor_vm_bind_prepare_op_ctx(struct drm_file *file, int ret; /* Aligned on page size. */ - if (!IS_ALIGNED(op->va | op->size, vm_pgsz)) + if (!IS_ALIGNED(op->va | op->size | op->bo_offset, vm_pgsz)) return -EINVAL; switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index fc274637114e..0e268fdfdb2f 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -33,7 +33,6 @@ int panthor_vm_active(struct panthor_vm *vm); void panthor_vm_idle(struct panthor_vm *vm); u32 panthor_vm_page_size(struct panthor_vm *vm); int panthor_vm_as(struct panthor_vm *vm); -int panthor_vm_flush_all(struct panthor_vm *vm); struct panthor_heap_pool * panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index 48bbfd40138c..8bee76d01bf8 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -70,6 +70,9 @@ #define GPU_PWR_OVERRIDE0 0x54 #define GPU_PWR_OVERRIDE1 0x58 +#define GPU_FEATURES 0x60 +#define GPU_FEATURES_RAY_INTERSECTION BIT(2) + #define GPU_TIMESTAMP_OFFSET 0x88 #define GPU_CYCLE_COUNT 0x90 #define GPU_TIMESTAMP 0x98 diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index a2248f692a03..3d1f57e3990f 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -641,6 +641,15 @@ struct panthor_group { size_t kbo_sizes; } fdinfo; + /** @task_info: Info of current->group_leader that created the group. */ + struct { + /** @task_info.pid: pid of current->group_leader */ + pid_t pid; + + /** @task_info.comm: comm of current->group_leader */ + char comm[TASK_COMM_LEN]; + } task_info; + /** @state: Group state. */ enum panthor_group_state state; @@ -886,8 +895,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * if (IS_ERR_OR_NULL(queue)) return; - if (queue->entity.fence_context) - drm_sched_entity_destroy(&queue->entity); + drm_sched_entity_destroy(&queue->entity); if (queue->scheduler.ops) drm_sched_fini(&queue->scheduler); @@ -1355,8 +1363,12 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, fatal = cs_iface->output->fatal; info = cs_iface->output->fatal_info; - if (group) + if (group) { + drm_warn(&ptdev->base, "CS_FATAL: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + group->fatal_queues |= BIT(cs_id); + } if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { /* If this exception is unrecoverable, queue a reset, and make @@ -1416,6 +1428,11 @@ cs_slot_process_fault_event_locked(struct panthor_device *ptdev, spin_unlock(&queue->fence_ctx.lock); } + if (group) { + drm_warn(&ptdev->base, "CS_FAULT: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + } + drm_warn(&ptdev->base, "CSG slot %d CS slot: %d\n" "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" @@ -1632,11 +1649,15 @@ csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 c lockdep_assert_held(&sched->lock); - drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); - group = csg_slot->group; - if (!drm_WARN_ON(&ptdev->base, !group)) + if (!drm_WARN_ON(&ptdev->base, !group)) { + drm_warn(&ptdev->base, "CSG_PROGRESS_TIMER_EVENT: pid=%d, comm=%s\n", + group->task_info.pid, group->task_info.comm); + group->timedout = true; + } + + drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); sched_queue_delayed_work(sched, tick, 0); } @@ -3218,7 +3239,8 @@ queue_timedout_job(struct drm_sched_job *sched_job) struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_queue *queue = group->queues[job->queue_idx]; - drm_warn(&ptdev->base, "job timeout\n"); + drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n", + group->task_info.pid, group->task_info.comm, job->done_fence->seqno); drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress)); @@ -3241,7 +3263,7 @@ queue_timedout_job(struct drm_sched_job *sched_job) queue_start(queue); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void queue_free_job(struct drm_sched_job *sched_job) @@ -3389,6 +3411,14 @@ err_free_queue: return ERR_PTR(ret); } +static void group_init_task_info(struct panthor_group *group) +{ + struct task_struct *task = current->group_leader; + + group->task_info.pid = task->pid; + get_task_comm(group->task_info.comm, task); +} + static void add_group_kbo_sizes(struct panthor_device *ptdev, struct panthor_group *group) { @@ -3540,6 +3570,8 @@ int panthor_group_create(struct panthor_file *pfile, add_group_kbo_sizes(group->ptdev, group); spin_lock_init(&group->fdinfo.lock); + group_init_task_info(group); + return gid; err_put_group: @@ -3558,11 +3590,6 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) if (!group) return -EINVAL; - for (u32 i = 0; i < group->queue_count; i++) { - if (group->queues[i]) - drm_sched_entity_destroy(&group->queues[i]->entity); - } - mutex_lock(&sched->reset.lock); mutex_lock(&sched->lock); group->destroyed = true; diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 70aff64ced87..ae7e572b1b4a 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1176,9 +1176,10 @@ err_drm_connector_cleanup: static struct drm_framebuffer * qxl_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file_priv, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file_priv, info, mode_cmd, &qxl_fb_funcs); } diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index d1c5e471bdca..3d9f47bc807a 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1832,7 +1832,7 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) return; } - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 7c3a960f486a..ba8db1d07c07 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2457,7 +2457,7 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, u32 tmp, tmp2; tmp = RREG32(MC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; + patch = (tmp & 0x0000f00) == 0x300; if (patch && ((rdev->pdev->device == 0x67B0) || @@ -3238,7 +3238,8 @@ static int ci_populate_all_graphic_levels(struct radeon_device *rdev) u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) * SMU7_MAX_LEVELS_GRAPHICS; SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel; - u32 i, ret; + int ret; + u32 i; memset(levels, 0, level_array_size); @@ -3285,7 +3286,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) * SMU7_MAX_LEVELS_MEMORY; SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel; - u32 i, ret; + int ret; + u32 i; memset(levels, 0, level_array_size); @@ -3436,7 +3438,7 @@ static int ci_setup_default_dpm_tables(struct radeon_device *rdev) pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value = allowed_sclk_vddc_table->entries[i].clk; pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = - (i == 0) ? true : false; + i == 0; pi->dpm_table.sclk_table.count++; } } @@ -3449,7 +3451,7 @@ static int ci_setup_default_dpm_tables(struct radeon_device *rdev) pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value = allowed_mclk_table->entries[i].clk; pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = - (i == 0) ? true : false; + i == 0; pi->dpm_table.mclk_table.count++; } } @@ -4487,7 +4489,7 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, bool patch; tmp = RREG32(MC_SEQ_MISC0); - patch = ((tmp & 0x0000f00) == 0x300) ? true : false; + patch = (tmp & 0x0000f00) == 0x300; if (patch && ((rdev->pdev->device == 0x67B0) || diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index a46613283393..1162cb5d75ed 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -211,7 +211,7 @@ static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p, surf->base_align = track->group_size; surf->palign = palign; surf->halign = 1; - if (surf->nbx & (palign - 1)) { + if ((surf->nbx & (palign - 1)) && !(palign == 64 && surf->nbx == 32)) { if (prefix) { dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", __func__, __LINE__, prefix, surf->nbx, palign); @@ -951,13 +951,13 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) u64 offset = (u64)track->vgt_strmout_bo_offset[i] + (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - i, offset, - radeon_bo_size(track->vgt_strmout_bo[i])); + dev_warn_once(p->dev, "streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } } else { - dev_warn(p->dev, "No buffer for streamout %d\n", i); + dev_warn_once(p->dev, "No buffer for streamout %d\n", i); return -EINVAL; } } @@ -979,8 +979,8 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) (tmp >> (i * 4)) & 0xF) { /* at least one component is enabled */ if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + dev_warn_once(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); return -EINVAL; } /* check cb */ @@ -1056,8 +1056,8 @@ static int evergreen_packet0_check(struct radeon_cs_parser *p, case EVERGREEN_VLINE_START_END: r = evergreen_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); return r; } break; @@ -1143,8 +1143,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_VSTMP_RING_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1155,15 +1155,15 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_DB_EQAA: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; case CAYMAN_DB_DEPTH_INFO: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; @@ -1172,8 +1172,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] &= ~Z_ARRAY_MODE(0xf); @@ -1214,8 +1214,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_Z_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_read_offset = radeon_get_ib_value(p, idx); @@ -1226,8 +1226,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_Z_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_write_offset = radeon_get_ib_value(p, idx); @@ -1238,8 +1238,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_STENCIL_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_read_offset = radeon_get_ib_value(p, idx); @@ -1250,8 +1250,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_STENCIL_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_write_offset = radeon_get_ib_value(p, idx); @@ -1273,8 +1273,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case VGT_STRMOUT_BUFFER_BASE_3: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; @@ -1295,8 +1295,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CP_COHER_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "missing reloc for CP_COHER_BASE " - "0x%04X\n", reg); + dev_warn_once(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1311,8 +1311,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case PA_SC_AA_CONFIG: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK; @@ -1320,8 +1320,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_PA_SC_AA_CONFIG: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK; @@ -1360,8 +1360,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); @@ -1378,8 +1378,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); @@ -1439,8 +1439,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -1467,8 +1467,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -1555,8 +1555,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 0x3c; @@ -1571,8 +1571,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; @@ -1584,8 +1584,8 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_HTILE_DATA_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx); @@ -1702,36 +1702,36 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_ALU_CONST_CACHE_LS_15: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case CAYMAN_SX_SCATTER_EXPORT_BASE: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1740,7 +1740,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; break; default: - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } return 0; @@ -1795,7 +1795,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1807,13 +1807,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return 0; if (pred_op > 2) { - DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); + dev_warn_once(p->dev, "bad SET PREDICATION operation %d\n", pred_op); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1827,7 +1827,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_CONTEXT_CONTROL: if (pkt->count != 1) { - DRM_ERROR("bad CONTEXT_CONTROL\n"); + dev_warn_once(p->dev, "bad CONTEXT_CONTROL\n"); return -EINVAL; } break; @@ -1835,17 +1835,17 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_NUM_INSTANCES: case PACKET3_CLEAR_STATE: if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; case CAYMAN_PACKET3_DEALLOC_STATE: if (p->rdev->family < CHIP_CAYMAN) { - DRM_ERROR("bad PACKET3_DEALLOC_STATE\n"); + dev_warn_once(p->dev, "bad PACKET3_DEALLOC_STATE\n"); return -EINVAL; } if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; @@ -1854,12 +1854,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } @@ -1872,7 +1872,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -1880,7 +1880,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDEX_BUFFER_SIZE: { if (pkt->count != 0) { - DRM_ERROR("bad INDEX_BUFFER_SIZE\n"); + dev_warn_once(p->dev, "bad INDEX_BUFFER_SIZE\n"); return -EINVAL; } break; @@ -1889,12 +1889,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, { uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } @@ -1907,7 +1907,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -1917,12 +1917,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } @@ -1935,63 +1935,63 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_MULTI_AUTO: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_MULTI_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_IMMD: if (pkt->count < 2) { - DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_IMMD\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_OFFSET\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET_2: if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET_2\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -2005,19 +2005,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32] */ if (pkt->count != 2) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } /* currently only supporting setting indirect draw buffer base address */ if (idx_value != 1) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_BASE\n"); + dev_warn_once(p->dev, "bad SET_BASE\n"); return -EINVAL; } @@ -2039,54 +2039,54 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context */ if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDIRECT\n"); + dev_warn_once(p->dev, "bad DRAW_INDIRECT\n"); return -EINVAL; } if (idx_value + size > track->indirect_draw_buffer_size) { - dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n", - idx_value, size, track->indirect_draw_buffer_size); + dev_warn_once(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n", + idx_value, size, track->indirect_draw_buffer_size); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DISPATCH_DIRECT: if (pkt->count != 3) { - DRM_ERROR("bad DISPATCH_DIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_DIRECT\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DISPATCH_INDIRECT: if (pkt->count != 1) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ @@ -2095,7 +2095,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } @@ -2106,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); ib[idx+2] = upper_32_bits(offset) & 0xff; } else if (idx_value & 0x100) { - DRM_ERROR("cannot use PFP on REG wait\n"); + dev_warn_once(p->dev, "cannot use PFP on REG wait\n"); return -EINVAL; } break; @@ -2115,7 +2115,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u32 command, size, info; u64 offset, tmp; if (pkt->count != 4) { - DRM_ERROR("bad CP DMA\n"); + dev_warn_once(p->dev, "bad CP DMA\n"); return -EINVAL; } command = radeon_get_ib_value(p, idx+4); @@ -2129,7 +2129,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if (size % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(p->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -2137,19 +2137,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* src address space is register */ /* GDS is ok */ if (((info & 0x60000000) >> 29) != 1) { - DRM_ERROR("CP DMA SAS not supported\n"); + dev_warn_once(p->dev, "CP DMA SAS not supported\n"); return -EINVAL; } } else { if (command & PACKET3_CP_DMA_CMD_SAIC) { - DRM_ERROR("CP DMA SAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA SAIC only supported for registers\n"); return -EINVAL; } /* src address space is memory */ if (((info & 0x60000000) >> 29) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA SRC\n"); + dev_warn_once(p->dev, "bad CP DMA SRC\n"); return -EINVAL; } @@ -2159,15 +2159,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx] = offset; ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); } else if (((info & 0x60000000) >> 29) != 2) { - DRM_ERROR("bad CP DMA SRC_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA SRC_SEL\n"); return -EINVAL; } } @@ -2175,19 +2175,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* dst address space is register */ /* GDS is ok */ if (((info & 0x00300000) >> 20) != 1) { - DRM_ERROR("CP DMA DAS not supported\n"); + dev_warn_once(p->dev, "CP DMA DAS not supported\n"); return -EINVAL; } } else { /* dst address space is memory */ if (command & PACKET3_CP_DMA_CMD_DAIC) { - DRM_ERROR("CP DMA DAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA DAIC only supported for registers\n"); return -EINVAL; } if (((info & 0x00300000) >> 20) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA DST\n"); + dev_warn_once(p->dev, "bad CP DMA DST\n"); return -EINVAL; } @@ -2197,15 +2197,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+2] = offset; ib[idx+3] = upper_32_bits(offset) & 0xff; } else { - DRM_ERROR("bad CP DMA DST_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA DST_SEL\n"); return -EINVAL; } } @@ -2213,13 +2213,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } case PACKET3_PFP_SYNC_ME: if (pkt->count) { - DRM_ERROR("bad PFP_SYNC_ME\n"); + dev_warn_once(p->dev, "bad PFP_SYNC_ME\n"); return -EINVAL; } break; case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ @@ -2227,7 +2227,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, radeon_get_ib_value(p, idx + 2) != 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2235,7 +2235,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_EVENT_WRITE: if (pkt->count != 2 && pkt->count != 0) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } if (pkt->count) { @@ -2243,7 +2243,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } offset = reloc->gpu_offset + @@ -2259,12 +2259,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } @@ -2281,12 +2281,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } @@ -2304,7 +2304,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { @@ -2321,7 +2321,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONTEXT_REG_START) || (start_reg >= PACKET3_SET_CONTEXT_REG_END) || (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONTEXT_REG\n"); return -EINVAL; } for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { @@ -2334,7 +2334,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SET_RESOURCE: if (pkt->count % 8) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START; @@ -2342,7 +2342,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_RESOURCE_START) || (start_reg >= PACKET3_SET_RESOURCE_END) || (end_reg >= PACKET3_SET_RESOURCE_END)) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } for (i = 0; i < (pkt->count / 8); i++) { @@ -2355,7 +2355,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* tex base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { @@ -2392,7 +2392,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } else { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2411,14 +2411,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* vtx base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (vtx)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (vtx)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1+(i*8)+0); size = radeon_get_ib_value(p, idx+1+(i*8)+1); if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ - dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n"); + dev_warn_once(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", + size + offset, radeon_bo_size(reloc->robj)); ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } @@ -2431,7 +2432,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } } @@ -2445,7 +2446,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_BOOL_CONST_START) || (start_reg >= PACKET3_SET_BOOL_CONST_END) || (end_reg >= PACKET3_SET_BOOL_CONST_END)) { - DRM_ERROR("bad SET_BOOL_CONST\n"); + dev_warn_once(p->dev, "bad SET_BOOL_CONST\n"); return -EINVAL; } break; @@ -2455,7 +2456,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_LOOP_CONST_START) || (start_reg >= PACKET3_SET_LOOP_CONST_END) || (end_reg >= PACKET3_SET_LOOP_CONST_END)) { - DRM_ERROR("bad SET_LOOP_CONST\n"); + dev_warn_once(p->dev, "bad SET_LOOP_CONST\n"); return -EINVAL; } break; @@ -2465,13 +2466,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CTL_CONST_START) || (start_reg >= PACKET3_SET_CTL_CONST_END) || (end_reg >= PACKET3_SET_CTL_CONST_END)) { - DRM_ERROR("bad SET_CTL_CONST\n"); + dev_warn_once(p->dev, "bad SET_CTL_CONST\n"); return -EINVAL; } break; case PACKET3_SET_SAMPLER: if (pkt->count % 3) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START; @@ -2479,13 +2480,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_SAMPLER_START) || (start_reg >= PACKET3_SET_SAMPLER_END) || (end_reg >= PACKET3_SET_SAMPLER_END)) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BUFFER_UPDATE: if (pkt->count != 4) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); return -EINVAL; } /* Updating memory at DST_ADDRESS. */ @@ -2493,14 +2494,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2512,14 +2513,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2532,23 +2533,23 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 3) { - DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+0); offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2558,7 +2559,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } case PACKET3_COPY_DW: if (pkt->count != 4) { - DRM_ERROR("bad COPY_DW (invalid count)\n"); + dev_warn_once(p->dev, "bad COPY_DW (invalid count)\n"); return -EINVAL; } if (idx_value & 0x1) { @@ -2566,14 +2567,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* SRC is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2583,8 +2584,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* SRC is a reg. */ reg = radeon_get_ib_value(p, idx+1) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 1); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); return -EINVAL; } } @@ -2593,14 +2594,14 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* DST is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2610,8 +2611,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* DST is a reg. */ reg = radeon_get_ib_value(p, idx+3) << 2; if (!evergreen_is_safe_reg(p, reg)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", - reg, idx + 3); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 3); return -EINVAL; } } @@ -2622,7 +2623,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint32_t allowed_reg_base; uint32_t source_sel; if (pkt->count != 2) { - DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (invalid count)\n"); return -EINVAL; } @@ -2632,8 +2633,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, areg = idx_value >> 16; if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { - dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n", - areg, idx); + dev_warn_once(p->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); return -EINVAL; } @@ -2643,7 +2644,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint32_t swap; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx + 1); @@ -2656,15 +2657,104 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (offset & 0xfffffffc) | swap; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { - DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n"); + dev_warn_once(p->dev, "bad SET_APPEND_CNT (unsupported operation)\n"); return -EINVAL; } break; } + case PACKET3_COND_EXEC: + { + u64 offset; + + if (pkt->count != 2) { + dev_warn_once(p->dev, "bad COND_EXEC (invalid count)\n"); + return -EINVAL; + } + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_EXEC (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 0); + offset += ((u64)(radeon_get_ib_value(p, idx + 1) & 0xff)) << 32UL; + if (offset & 0x7) { + dev_warn_once(p->dev, "bad COND_EXEC (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_EXEC bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 0] = offset; + ib[idx + 1] = upper_32_bits(offset) & 0xff; + break; + } + case PACKET3_COND_WRITE: + if (pkt->count != 7) { + dev_warn_once(p->dev, "bad COND_WRITE (invalid count)\n"); + return -EINVAL; + } + if (idx_value & 0x10) { + u64 offset; + /* POLL is memory. */ + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_WRITE (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 1); + offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32; + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_WRITE src bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 1] = offset; + ib[idx + 2] = upper_32_bits(offset) & 0xff; + } else { + /* POLL is a reg. */ + reg = radeon_get_ib_value(p, idx + 1) << 2; + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); + return -EINVAL; + } + } + if (idx_value & 0x100) { + u64 offset; + /* WRITE is memory. */ + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_WRITE (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 5); + offset += ((u64)(radeon_get_ib_value(p, idx + 6) & 0xff)) << 32; + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_WRITE dst bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 5] = offset; + ib[idx + 6] = upper_32_bits(offset) & 0xff; + } else { + /* WRITE is a reg. */ + reg = radeon_get_ib_value(p, idx + 5) << 2; + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 5); + return -EINVAL; + } + } + break; case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2764,7 +2854,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) r = evergreen_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); kfree(p->track); p->track = NULL; return -EINVAL; @@ -2807,8 +2897,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) do { if (p->idx >= ib_chunk->length_dw) { - DRM_ERROR("Can not parse packet at %d after CS end %d !\n", - p->idx, ib_chunk->length_dw); + dev_warn_once(p->dev, "Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); return -EINVAL; } idx = p->idx; @@ -2821,7 +2911,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case DMA_PACKET_WRITE: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } switch (sub_cmd) { @@ -2843,24 +2933,24 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_COPY: r = r600_dma_cs_next_reloc(p, &src_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } switch (sub_cmd) { @@ -2872,13 +2962,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -2912,13 +3002,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -2931,13 +3021,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", - src_offset + count, radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", + src_offset + count, radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", - dst_offset + count, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", + dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); @@ -2950,7 +3040,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x41: /* L2L, partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2L Partial is cayman only !\n"); return -EINVAL; } ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); @@ -2965,7 +3055,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* L2L, dw, broadcast */ r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2L, dw, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -2975,18 +3065,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+3); src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -3000,12 +3090,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* Copy L2T Frame to Field */ case 0x48: if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3015,18 +3105,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3039,7 +3129,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x49: /* L2T, T2L partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } /* detile bit */ @@ -3062,12 +3152,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4b: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3077,18 +3167,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3123,13 +3213,13 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -3138,7 +3228,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4d: /* T2T partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); @@ -3149,12 +3239,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4f: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3164,18 +3254,18 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); @@ -3185,21 +3275,21 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); return -EINVAL; } break; case DMA_PACKET_CONSTANT_FILL: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_CONSTANT_FILL\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -3210,7 +3300,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(p->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (p->idx < p->chunk_ib->length_dw); @@ -3341,7 +3431,7 @@ static bool evergreen_vm_reg_valid(u32 reg) case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: return true; default: - DRM_ERROR("Invalid register 0x%x in CS\n", reg); + DRM_DEBUG("Invalid register 0x%x in CS\n", reg); return false; } } @@ -3359,7 +3449,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, break; case PACKET3_SET_BASE: if (idx_value != 1) { - DRM_ERROR("bad SET_BASE"); + dev_warn_once(rdev->dev, "bad SET_BASE"); return -EINVAL; } break; @@ -3406,7 +3496,12 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, case CAYMAN_PACKET3_DEALLOC_STATE: break; case PACKET3_COND_WRITE: - if (idx_value & 0x100) { + if (!(idx_value & 0x10)) { + reg = ib[idx + 1] * 4; + if (!evergreen_vm_reg_valid(reg)) + return -EINVAL; + } + if (!(idx_value & 0x100)) { reg = ib[idx + 5] * 4; if (!evergreen_vm_reg_valid(reg)) return -EINVAL; @@ -3425,7 +3520,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(rdev->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -3445,7 +3540,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if ((command & 0x1fffff) % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(rdev->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -3456,14 +3551,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_SAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } @@ -3477,14 +3572,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_DAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } @@ -3497,7 +3592,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, uint32_t allowed_reg_base; if (pkt->count != 2) { - DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n"); + dev_warn_once(rdev->dev, "bad SET_APPEND_CNT (invalid count)\n"); return -EINVAL; } @@ -3507,8 +3602,8 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, areg = idx_value >> 16; if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { - DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n", - areg, idx); + dev_warn_once(rdev->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); return -EINVAL; } break; @@ -3587,7 +3682,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3638,7 +3735,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3649,7 +3748,7 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(rdev->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (idx < ib->length_dw); diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index e08559c44a5c..82edbfb259bf 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -3397,7 +3397,7 @@ static int ni_enable_smc_cac(struct radeon_device *rdev, if (PPSMC_Result_OK != smc_result) ret = -EINVAL; - ni_pi->cac_enabled = (PPSMC_Result_OK == smc_result) ? true : false; + ni_pi->cac_enabled = PPSMC_Result_OK == smc_result; } } else if (ni_pi->cac_enabled) { smc_result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_DisableCac); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 80703417d8a1..07a9c523a17a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1298,8 +1298,8 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1313,7 +1313,7 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, tile_flags |= RADEON_DST_TILE_MACRO; if (reloc->tiling_flags & RADEON_TILING_MICRO) { if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); + dev_warn_once(p->dev, "Cannot src blit from microtiled surface\n"); radeon_cs_dump_packet(p, pkt); return -EINVAL; } @@ -1342,8 +1342,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, track = (struct r100_cs_track *)p->track; c = radeon_get_ib_value(p, idx++) & 0x1F; if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); + dev_warn_once(p->dev, "Only 16 vertex buffers are allowed %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return -EINVAL; } @@ -1351,8 +1351,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, for (i = 0; i < (c - 1); i += 2, idx += 3) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1364,8 +1364,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, track->arrays[i + 0].esize &= 0x7F; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1377,8 +1377,8 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, if (c & 1) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", + pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1470,12 +1470,12 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) /* check its a wait until and only 1 count */ if (waitreloc.reg != RADEON_WAIT_UNTIL || waitreloc.count != 0) { - DRM_ERROR("vline wait had illegal wait until segment\n"); + dev_warn_once(p->dev, "vline wait had illegal wait until segment\n"); return -EINVAL; } if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) { - DRM_ERROR("vline wait had illegal wait until\n"); + dev_warn_once(p->dev, "vline wait had illegal wait until\n"); return -EINVAL; } @@ -1493,7 +1493,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) reg = R100_CP_PACKET0_GET_REG(header); crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { - DRM_ERROR("cannot find crtc %d\n", crtc_id); + dev_warn_once(p->dev, "cannot find crtc %d\n", crtc_id); return -ENOENT; } radeon_crtc = to_radeon_crtc(crtc); @@ -1514,7 +1514,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2; break; default: - DRM_ERROR("unknown crtc reloc\n"); + dev_warn_once(p->dev, "unknown crtc reloc\n"); return -EINVAL; } ib[h_idx] = header; @@ -1599,7 +1599,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", idx, reg); radeon_cs_dump_packet(p, pkt); return r; @@ -1616,8 +1616,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1629,8 +1629,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1645,8 +1645,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_TXOFFSET_0) / 24; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1672,8 +1672,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1690,8 +1690,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1708,8 +1708,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1726,8 +1726,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLORPITCH: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1768,8 +1768,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->cb[0].cpp = 4; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); @@ -1797,8 +1797,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1927,10 +1927,10 @@ int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p, idx = pkt->idx + 1; value = radeon_get_ib_value(p, idx + 2); if ((value + 1) > radeon_bo_size(robj)) { - DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER " - "(need %u have %lu) !\n", - value + 1, - radeon_bo_size(robj)); + dev_warn_once(p->dev, "[drm] Buffer too small for PACKET3 INDX_BUFFER " + "(need %u have %lu) !\n", + value + 1, + radeon_bo_size(robj)); return -EINVAL; } return 0; @@ -1957,7 +1957,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDX_BUFFER: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1971,7 +1971,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1992,7 +1992,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_3D_DRAW_IMMD: if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0)); @@ -2005,7 +2005,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, /* triggers drawing using in-packet vertex data */ case PACKET3_3D_DRAW_IMMD_2: if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx); @@ -2051,7 +2051,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2093,8 +2093,8 @@ int r100_cs_parse(struct radeon_cs_parser *p) r = r100_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", - pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", + pkt.type); return -EINVAL; } if (r) @@ -2105,19 +2105,19 @@ int r100_cs_parse(struct radeon_cs_parser *p) static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) { - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("use_pitch %d\n", t->use_pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("width_11 %d\n", t->width_11); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("height_11 %d\n", t->height_11); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); - DRM_ERROR("compress format %d\n", t->compress_format); + DRM_DEBUG("pitch %d\n", t->pitch); + DRM_DEBUG("use_pitch %d\n", t->use_pitch); + DRM_DEBUG("width %d\n", t->width); + DRM_DEBUG("width_11 %d\n", t->width_11); + DRM_DEBUG("height %d\n", t->height); + DRM_DEBUG("height_11 %d\n", t->height_11); + DRM_DEBUG("num levels %d\n", t->num_levels); + DRM_DEBUG("depth %d\n", t->txdepth); + DRM_DEBUG("bpp %d\n", t->cpp); + DRM_DEBUG("coordinate type %d\n", t->tex_coord_type); + DRM_DEBUG("width round to power of 2 %d\n", t->roundup_w); + DRM_DEBUG("height round to power of 2 %d\n", t->roundup_h); + DRM_DEBUG("compress format %d\n", t->compress_format); } static int r100_track_compress_size(int compress_format, int w, int h) @@ -2172,8 +2172,9 @@ static int r100_cs_track_cube(struct radeon_device *rdev, size += track->textures[idx].cube_info[face].offset; if (size > radeon_bo_size(cube_robj)) { - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", - size, radeon_bo_size(cube_robj)); + dev_warn_once(rdev->dev, + "Cube texture offset greater than object size %lu %lu\n", + size, radeon_bo_size(cube_robj)); r100_cs_track_texture_print(&track->textures[idx]); return -1; } @@ -2196,7 +2197,7 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, continue; robj = track->textures[u].robj; if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); + dev_warn_once(rdev->dev, "No texture bound to unit %u\n", u); return -EINVAL; } size = 0; @@ -2249,13 +2250,13 @@ static int r100_cs_track_texture_check(struct radeon_device *rdev, size *= 6; break; default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); + dev_warn_once(rdev->dev, "Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); return -EINVAL; } if (size > radeon_bo_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_bo_size(robj)); + dev_warn_once(rdev->dev, "Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_bo_size(robj)); r100_cs_track_texture_print(&track->textures[u]); return -EINVAL; } @@ -2277,18 +2278,18 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < num_cb; i++) { if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + dev_warn_once(rdev->dev, "[drm] No buffer for color buffer %d !\n", i); return -EINVAL; } size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; size += track->cb[i].offset; if (size > radeon_bo_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->cb[i].robj)); + dev_warn_once(rdev->dev, "[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); return -EINVAL; } } @@ -2296,18 +2297,18 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) if (track->zb_dirty && track->z_enabled) { if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); + dev_warn_once(rdev->dev, "[drm] No buffer for z buffer !\n"); return -EINVAL; } size = track->zb.pitch * track->zb.cpp * track->maxy; size += track->zb.offset; if (size > radeon_bo_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_bo_size(track->zb.robj)); - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", - track->zb.pitch, track->zb.cpp, - track->zb.offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_bo_size(track->zb.robj)); + dev_warn_once(rdev->dev, "[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); return -EINVAL; } } @@ -2315,19 +2316,19 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) if (track->aa_dirty && track->aaresolve) { if (track->aa.robj == NULL) { - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); + dev_warn_once(rdev->dev, "[drm] No buffer for AA resolve buffer %d !\n", i); return -EINVAL; } /* I believe the format comes from colorbuffer0. */ size = track->aa.pitch * track->cb[0].cpp * track->maxy; size += track->aa.offset; if (size > radeon_bo_size(track->aa.robj)) { - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->aa.robj)); - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", - i, track->aa.pitch, track->cb[0].cpp, - track->aa.offset, track->maxy); + dev_warn_once(rdev->dev, "[drm] Buffer too small for AA resolve buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->aa.robj)); + dev_warn_once(rdev->dev, "[drm] AA resolve buffer %d (%u %u %u %u)\n", + i, track->aa.pitch, track->cb[0].cpp, + track->aa.offset, track->maxy); return -EINVAL; } } @@ -2344,17 +2345,17 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < track->num_arrays; i++) { size = track->arrays[i].esize * track->max_indx * 4UL; if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); return -EINVAL; } if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + dev_warn_once(rdev->dev, "Max indices %u\n", track->max_indx); return -EINVAL; } } @@ -2363,16 +2364,16 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) for (i = 0; i < track->num_arrays; i++) { size = track->arrays[i].esize * (nverts - 1) * 4UL; if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); return -EINVAL; } if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); + dev_warn_once(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); return -EINVAL; } } @@ -2380,16 +2381,16 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) case 3: size = track->vtx_size * nverts; if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); + dev_warn_once(rdev->dev, "IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + dev_warn_once(rdev->dev, "VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); return -EINVAL; } break; default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); + dev_warn_once(rdev->dev, "[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index f5f2ffea5ab2..10a65a71de31 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -163,8 +163,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -180,8 +180,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -193,8 +193,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -212,8 +212,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, i = (reg - R200_PP_TXOFFSET_0) / 24; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -265,8 +265,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -283,8 +283,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_COLORPITCH: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -326,12 +326,12 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->cb[0].cpp = 4; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); return -EINVAL; } if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { - DRM_ERROR("No support for depth xy offset in kms\n"); + dev_warn_once(p->dev, "No support for depth xy offset in kms\n"); return -EINVAL; } @@ -360,8 +360,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, case RADEON_RB3D_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index d22889fbfa9c..d2ee6deec039 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -645,8 +645,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case RADEON_CRTC_GUI_TRIG_VLINE: r = r100_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -664,8 +664,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, i = (reg - R300_RB3D_COLOROFFSET0) >> 2; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -677,8 +677,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_ZB_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -706,8 +706,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, i = (reg - R300_TX_OFFSET_0) >> 2; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -762,7 +762,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, /* RB3D_CCTL */ if ((idx_value & (1 << 10)) && /* CMASK_ENABLE */ p->rdev->cmask_filp != p->filp) { - DRM_ERROR("Invalid RB3D_CCTL: Cannot enable CMASK.\n"); + dev_warn_once(p->dev, "Invalid RB3D_CCTL: Cannot enable CMASK.\n"); return -EINVAL; } track->num_cb = ((idx_value >> 5) & 0x3) + 1; @@ -779,8 +779,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -812,8 +812,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case 5: if (p->rdev->family < CHIP_RV515) { - DRM_ERROR("Invalid color buffer format (%d)!\n", - ((idx_value >> 21) & 0xF)); + dev_warn_once(p->dev, "Invalid color buffer format (%d)!\n", + ((idx_value >> 21) & 0xF)); return -EINVAL; } fallthrough; @@ -827,8 +827,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->cb[i].cpp = 16; break; default: - DRM_ERROR("Invalid color buffer format (%d) !\n", - ((idx_value >> 21) & 0xF)); + dev_warn_once(p->dev, "Invalid color buffer format (%d) !\n", + ((idx_value >> 21) & 0xF)); return -EINVAL; } track->cb_dirty = true; @@ -853,8 +853,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->zb.cpp = 4; break; default: - DRM_ERROR("Invalid z buffer format (%d) !\n", - (idx_value & 0xF)); + dev_warn_once(p->dev, "Invalid z buffer format (%d) !\n", + (idx_value & 0xF)); return -EINVAL; } track->zb_dirty = true; @@ -864,8 +864,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -962,8 +962,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, break; case R300_TX_FORMAT_ATI2N: if (p->rdev->family < CHIP_R420) { - DRM_ERROR("Invalid texture format %u\n", - (idx_value & 0x1F)); + dev_warn_once(p->dev, "Invalid texture format %u\n", + (idx_value & 0x1F)); return -EINVAL; } /* The same rules apply as for DXT3/5. */ @@ -974,8 +974,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->textures[i].compress_format = R100_TRACK_COMP_DXT35; break; default: - DRM_ERROR("Invalid texture format %u\n", - (idx_value & 0x1F)); + dev_warn_once(p->dev, "Invalid texture format %u\n", + (idx_value & 0x1F)); return -EINVAL; } track->tex_dirty = true; @@ -1041,7 +1041,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, R100_TRACK_COMP_DXT1; } } else if (idx_value & (1 << 14)) { - DRM_ERROR("Forbidden bit TXFORMAT_MSB\n"); + dev_warn_once(p->dev, "Forbidden bit TXFORMAT_MSB\n"); return -EINVAL; } track->tex_dirty = true; @@ -1079,8 +1079,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_ZB_ZPASS_ADDR: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1121,8 +1121,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p, case R300_RB3D_AARESOLVE_OFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); radeon_cs_dump_packet(p, pkt); return r; } @@ -1191,7 +1191,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, case PACKET3_INDX_BUFFER: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode); + dev_warn_once(p->dev, "No reloc for packet3 %d\n", pkt->opcode); radeon_cs_dump_packet(p, pkt); return r; } @@ -1207,7 +1207,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1); @@ -1222,7 +1222,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, * PRIM_WALK must be equal to 3 vertex data in embedded * in cmd stream */ if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) { - DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n"); + dev_warn_once(p->dev, "PRIM_WALK must be 3 for IMMD draw\n"); return -EINVAL; } track->vap_vf_cntl = radeon_get_ib_value(p, idx); @@ -1272,7 +1272,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -1308,7 +1308,7 @@ int r300_cs_parse(struct radeon_cs_parser *p) r = r300_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); return -EINVAL; } if (r) { diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index ac77d1246b94..8eeceeeca362 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -361,9 +361,9 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) format = G_0280A0_FORMAT(track->cb_color_info[i]); if (!r600_fmt_is_valid_color(format)) { - dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", - __func__, __LINE__, format, - i, track->cb_color_info[i]); + dev_warn_once(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n", + __func__, __LINE__, format, + i, track->cb_color_info[i]); return -EINVAL; } /* pitch in pixels */ @@ -384,9 +384,9 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) array_check.blocksize = r600_fmt_get_blocksize(format); if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, - G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, - track->cb_color_info[i]); + dev_warn_once(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, + G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, + track->cb_color_info[i]); return -EINVAL; } switch (array_mode) { @@ -402,25 +402,26 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) case V_0280A0_ARRAY_2D_TILED_THIN1: break; default: - dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, - G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, - track->cb_color_info[i]); + dev_warn_once(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__, + G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i, + track->cb_color_info[i]); return -EINVAL; } if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, pitch, pitch_align, array_mode); + dev_warn_once(p->dev, "%s:%d cb pitch (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, pitch, pitch_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(height, height_align)) { - dev_warn(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, height, height_align, array_mode); + dev_warn_once(p->dev, "%s:%d cb height (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, height, height_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i, - base_offset, base_align, array_mode); + dev_warn_once(p->dev, + "%s offset[%d] 0x%llx 0x%llx, %d not aligned\n", __func__, i, + base_offset, base_align, array_mode); return -EINVAL; } @@ -447,13 +448,14 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) * broken userspace. */ } else { - dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n", - __func__, i, array_mode, - track->cb_color_bo_offset[i], tmp, - radeon_bo_size(track->cb_color_bo[i]), - pitch, height, r600_fmt_get_nblocksx(format, pitch), - r600_fmt_get_nblocksy(format, height), - r600_fmt_get_blocksize(format)); + dev_warn_once(p->dev, + "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n", + __func__, i, array_mode, + track->cb_color_bo_offset[i], tmp, + radeon_bo_size(track->cb_color_bo[i]), + pitch, height, r600_fmt_get_nblocksx(format, pitch), + r600_fmt_get_nblocksy(format, height), + r600_fmt_get_blocksize(format)); return -EINVAL; } } @@ -478,11 +480,11 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) if (bytes + track->cb_color_frag_offset[i] > radeon_bo_size(track->cb_color_frag_bo[i])) { - dev_warn(p->dev, "%s FMASK_TILE_MAX too large " - "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", - __func__, tile_max, bytes, - track->cb_color_frag_offset[i], - radeon_bo_size(track->cb_color_frag_bo[i])); + dev_warn_once(p->dev, "%s FMASK_TILE_MAX too large " + "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, tile_max, bytes, + track->cb_color_frag_offset[i], + radeon_bo_size(track->cb_color_frag_bo[i])); return -EINVAL; } } @@ -496,17 +498,17 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) if (bytes + track->cb_color_tile_offset[i] > radeon_bo_size(track->cb_color_tile_bo[i])) { - dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large " - "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", - __func__, block_max, bytes, - track->cb_color_tile_offset[i], - radeon_bo_size(track->cb_color_tile_bo[i])); + dev_warn_once(p->dev, "%s CMASK_BLOCK_MAX too large " + "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n", + __func__, block_max, bytes, + track->cb_color_tile_offset[i], + radeon_bo_size(track->cb_color_tile_bo[i])); return -EINVAL; } break; } default: - dev_warn(p->dev, "%s invalid tile mode\n", __func__); + dev_warn_once(p->dev, "%s invalid tile mode\n", __func__); return -EINVAL; } return 0; @@ -526,7 +528,7 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) if (track->db_bo == NULL) { - dev_warn(p->dev, "z/stencil with no depth buffer\n"); + dev_warn_once(p->dev, "z/stencil with no depth buffer\n"); return -EINVAL; } switch (G_028010_FORMAT(track->db_depth_info)) { @@ -544,20 +546,22 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) bpe = 8; break; default: - dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info)); + dev_warn_once(p->dev, + "z/stencil with invalid format %d\n", + G_028010_FORMAT(track->db_depth_info)); return -EINVAL; } if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { if (!track->db_depth_size_idx) { - dev_warn(p->dev, "z/stencil buffer size not set\n"); + dev_warn_once(p->dev, "z/stencil buffer size not set\n"); return -EINVAL; } tmp = radeon_bo_size(track->db_bo) - track->db_offset; tmp = (tmp / bpe) >> 6; if (!tmp) { - dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", - track->db_depth_size, bpe, track->db_offset, - radeon_bo_size(track->db_bo)); + dev_warn_once(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n", + track->db_depth_size, bpe, track->db_offset, + radeon_bo_size(track->db_bo)); return -EINVAL; } ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF); @@ -579,9 +583,9 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) array_check.blocksize = bpe; if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); + dev_warn_once(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); return -EINVAL; } switch (array_mode) { @@ -592,24 +596,24 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) case V_028010_ARRAY_2D_TILED_THIN1: break; default: - dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, - G_028010_ARRAY_MODE(track->db_depth_info), - track->db_depth_info); + dev_warn_once(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__, + G_028010_ARRAY_MODE(track->db_depth_info), + track->db_depth_info); return -EINVAL; } if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", + dev_warn_once(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n", __func__, __LINE__, pitch, pitch_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(height, height_align)) { - dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", + dev_warn_once(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n", __func__, __LINE__, height, height_align, array_mode); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, + dev_warn_once(p->dev, "%s offset 0x%llx, 0x%llx, %d not aligned\n", __func__, base_offset, base_align, array_mode); return -EINVAL; } @@ -618,10 +622,11 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1; tmp = ntiles * bpe * 64 * nviews * track->nsamples; if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) { - dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", - array_mode, - track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, - radeon_bo_size(track->db_bo)); + dev_warn_once(p->dev, + "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n", + array_mode, + track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset, + radeon_bo_size(track->db_bo)); return -EINVAL; } } @@ -632,13 +637,13 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) unsigned nbx, nby; if (track->htile_bo == NULL) { - dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", - __func__, __LINE__, track->db_depth_info); + dev_warn_once(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", + __func__, __LINE__, track->db_depth_info); return -EINVAL; } if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) { - dev_warn(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", - __func__, __LINE__, track->db_depth_size); + dev_warn_once(p->dev, "%s:%d htile can't be enabled with bogus db_depth_size 0x%08x\n", + __func__, __LINE__, track->db_depth_size); return -EINVAL; } @@ -676,8 +681,8 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) nby = round_up(nby, 16 * 8); break; default: - dev_warn(p->dev, "%s:%d invalid num pipes %d\n", - __func__, __LINE__, track->npipes); + dev_warn_once(p->dev, "%s:%d invalid num pipes %d\n", + __func__, __LINE__, track->npipes); return -EINVAL; } } @@ -689,9 +694,9 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p) size += track->htile_offset; if (size > radeon_bo_size(track->htile_bo)) { - dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", - __func__, __LINE__, radeon_bo_size(track->htile_bo), - size, nbx, nby); + dev_warn_once(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", + __func__, __LINE__, radeon_bo_size(track->htile_bo), + size, nbx, nby); return -EINVAL; } } @@ -718,13 +723,13 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) u64 offset = (u64)track->vgt_strmout_bo_offset[i] + (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - i, offset, - radeon_bo_size(track->vgt_strmout_bo[i])); + dev_warn_once(p->dev, "streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } } else { - dev_warn(p->dev, "No buffer for streamout %d\n", i); + dev_warn_once(p->dev, "No buffer for streamout %d\n", i); return -EINVAL; } } @@ -753,8 +758,8 @@ static int r600_cs_track_check(struct radeon_cs_parser *p) (tmp >> (i * 4)) & 0xF) { /* at least one component is enabled */ if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + dev_warn_once(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); return -EINVAL; } /* perform rewrite of CB_COLOR[0-7]_SIZE */ @@ -841,33 +846,33 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, /* check its a WAIT_REG_MEM */ if (wait_reg_mem.type != RADEON_PACKET_TYPE3 || wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) { - DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n"); + dev_warn_once(p->dev, "vline wait missing WAIT_REG_MEM segment\n"); return -EINVAL; } wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1); /* bit 4 is reg (0) or mem (1) */ if (wait_reg_mem_info & 0x10) { - DRM_ERROR("vline WAIT_REG_MEM waiting on MEM instead of REG\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM waiting on MEM instead of REG\n"); return -EINVAL; } /* bit 8 is me (0) or pfp (1) */ if (wait_reg_mem_info & 0x100) { - DRM_ERROR("vline WAIT_REG_MEM waiting on PFP instead of ME\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM waiting on PFP instead of ME\n"); return -EINVAL; } /* waiting for value to be equal */ if ((wait_reg_mem_info & 0x7) != 0x3) { - DRM_ERROR("vline WAIT_REG_MEM function not equal\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM function not equal\n"); return -EINVAL; } if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != vline_status[0]) { - DRM_ERROR("vline WAIT_REG_MEM bad reg\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM bad reg\n"); return -EINVAL; } if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != RADEON_VLINE_STAT) { - DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n"); + dev_warn_once(p->dev, "vline WAIT_REG_MEM bad bit mask\n"); return -EINVAL; } @@ -886,7 +891,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { - DRM_ERROR("cannot find crtc %d\n", crtc_id); + dev_warn_once(p->dev, "cannot find crtc %d\n", crtc_id); return -ENOENT; } radeon_crtc = to_radeon_crtc(crtc); @@ -907,7 +912,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, ib[h_idx] = header; ib[h_idx + 4] = vline_status[crtc_id] >> 2; } else { - DRM_ERROR("unknown crtc reloc\n"); + dev_warn_once(p->dev, "unknown crtc reloc\n"); return -EINVAL; } return 0; @@ -923,8 +928,8 @@ static int r600_packet0_check(struct radeon_cs_parser *p, case AVIVO_D1MODE_VLINE_START_END: r = r600_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); return r; } break; @@ -972,7 +977,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) i = (reg >> 7); if (i >= ARRAY_SIZE(r600_reg_safe_bm)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } m = 1 << ((reg >> 2) & 31); @@ -1013,8 +1018,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_VSTMP_RING_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1031,8 +1036,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) radeon_cs_packet_next_is_pkt3_nop(p)) { r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_depth_info = radeon_get_ib_value(p, idx); @@ -1073,8 +1078,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case VGT_STRMOUT_BUFFER_BASE_3: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; @@ -1096,8 +1101,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CP_COHER_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "missing reloc for CP_COHER_BASE " - "0x%04X\n", reg); + dev_warn_once(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1270,8 +1275,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 4; @@ -1285,8 +1290,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_DEPTH_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_offset = radeon_get_ib_value(p, idx) << 8; @@ -1298,8 +1303,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_HTILE_DATA_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->htile_offset = (u64)radeon_get_ib_value(p, idx) << 8; @@ -1368,8 +1373,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_ALU_CONST_CACHE_VS_15: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1377,8 +1382,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SX_MEMORY_EXPORT_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1387,7 +1392,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; break; default: - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } return 0; @@ -1408,7 +1413,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, unsigned block_align, unsigned height_align, unsigned base_align, unsigned *l0_size, unsigned *mipmap_size) { - unsigned offset, i, level; + unsigned offset, i; unsigned width, height, depth, size; unsigned blocksize; unsigned nbx, nby; @@ -1420,7 +1425,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, w0 = r600_mip_minify(w0, 0); h0 = r600_mip_minify(h0, 0); d0 = r600_mip_minify(d0, 0); - for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) { + for (i = 0, offset = 0; i < nlevels; i++) { width = r600_mip_minify(w0, i); nbx = r600_fmt_get_nblocksx(format, width); @@ -1543,43 +1548,43 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, llevel = 0; break; default: - dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); + dev_warn_once(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0)); return -EINVAL; } if (!r600_fmt_is_valid_texture(format, p->family)) { - dev_warn(p->dev, "%s:%d texture invalid format %d\n", - __func__, __LINE__, format); + dev_warn_once(p->dev, "%s:%d texture invalid format %d\n", + __func__, __LINE__, format); return -EINVAL; } if (r600_get_array_mode_alignment(&array_check, &pitch_align, &height_align, &depth_align, &base_align)) { - dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n", - __func__, __LINE__, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex array mode (%d) invalid\n", + __func__, __LINE__, G_038000_TILE_MODE(word0)); return -EINVAL; } /* XXX check height as well... */ if (!IS_ALIGNED(pitch, pitch_align)) { - dev_warn(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n", - __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex pitch (%d, 0x%x, %d) invalid\n", + __func__, __LINE__, pitch, pitch_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (!IS_ALIGNED(base_offset, base_align)) { - dev_warn(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n", - __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex base offset (0x%llx, 0x%llx, %d) invalid\n", + __func__, __LINE__, base_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (!IS_ALIGNED(mip_offset, base_align)) { - dev_warn(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n", - __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); + dev_warn_once(p->dev, "%s:%d tex mip offset (0x%llx, 0x%llx, %d) invalid\n", + __func__, __LINE__, mip_offset, base_align, G_038000_TILE_MODE(word0)); return -EINVAL; } if (blevel > llevel) { - dev_warn(p->dev, "texture blevel %d > llevel %d\n", - blevel, llevel); + dev_warn_once(p->dev, "texture blevel %d > llevel %d\n", + blevel, llevel); } if (is_array) { barray = G_038014_BASE_ARRAY(word5); @@ -1592,16 +1597,16 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, &l0_size, &mipmap_size); /* using get ib will give us the offset into the texture bo */ if ((l0_size + word2) > radeon_bo_size(texture)) { - dev_warn(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n", - w0, h0, pitch_align, height_align, - array_check.array_mode, format, word2, - l0_size, radeon_bo_size(texture)); - dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align); + dev_warn_once(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n", + w0, h0, pitch_align, height_align, + array_check.array_mode, format, word2, + l0_size, radeon_bo_size(texture)); + dev_warn_once(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align); return -EINVAL; } /* using get ib will give us the offset into the mipmap bo */ if ((mipmap_size + word3) > radeon_bo_size(mipmap)) { - /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", + /*dev_warn_once(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/ } return 0; @@ -1613,13 +1618,13 @@ static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) i = (reg >> 7); if (i >= ARRAY_SIZE(r600_reg_safe_bm)) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return false; } m = 1 << ((reg >> 2) & 31); if (!(r600_reg_safe_bm[i] & m)) return true; - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return false; } @@ -1648,7 +1653,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1660,13 +1665,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return 0; if (pred_op > 2) { - DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); + dev_warn_once(p->dev, "bad SET PREDICATION operation %d\n", pred_op); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1681,20 +1686,20 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case PACKET3_START_3D_CMDBUF: if (p->family >= CHIP_RV770 || pkt->count) { - DRM_ERROR("bad START_3D\n"); + dev_warn_once(p->dev, "bad START_3D\n"); return -EINVAL; } break; case PACKET3_CONTEXT_CONTROL: if (pkt->count != 1) { - DRM_ERROR("bad CONTEXT_CONTROL\n"); + dev_warn_once(p->dev, "bad CONTEXT_CONTROL\n"); return -EINVAL; } break; case PACKET3_INDEX_TYPE: case PACKET3_NUM_INSTANCES: if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES\n"); return -EINVAL; } break; @@ -1702,12 +1707,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p, { uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } @@ -1720,37 +1725,37 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_AUTO\n"); return -EINVAL; } r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_IMMD_BE: case PACKET3_DRAW_INDEX_IMMD: if (pkt->count < 2) { - DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_IMMD\n"); return -EINVAL; } r = r600_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ @@ -1759,7 +1764,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } @@ -1770,7 +1775,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0); ib[idx+2] = upper_32_bits(offset) & 0xff; } else if (idx_value & 0x100) { - DRM_ERROR("cannot use PFP on REG wait\n"); + dev_warn_once(p->dev, "cannot use PFP on REG wait\n"); return -EINVAL; } break; @@ -1779,24 +1784,24 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u32 command, size; u64 offset, tmp; if (pkt->count != 4) { - DRM_ERROR("bad CP DMA\n"); + dev_warn_once(p->dev, "bad CP DMA\n"); return -EINVAL; } command = radeon_get_ib_value(p, idx+4); size = command & 0x1fffff; if (command & PACKET3_CP_DMA_CMD_SAS) { /* src address space is register */ - DRM_ERROR("CP DMA SAS not supported\n"); + dev_warn_once(p->dev, "CP DMA SAS not supported\n"); return -EINVAL; } else { if (command & PACKET3_CP_DMA_CMD_SAIC) { - DRM_ERROR("CP DMA SAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA SAIC only supported for registers\n"); return -EINVAL; } /* src address space is memory */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad CP DMA SRC\n"); + dev_warn_once(p->dev, "bad CP DMA SRC\n"); return -EINVAL; } @@ -1806,8 +1811,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -1816,17 +1821,17 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } if (command & PACKET3_CP_DMA_CMD_DAS) { /* dst address space is register */ - DRM_ERROR("CP DMA DAS not supported\n"); + dev_warn_once(p->dev, "CP DMA DAS not supported\n"); return -EINVAL; } else { /* dst address space is memory */ if (command & PACKET3_CP_DMA_CMD_DAIC) { - DRM_ERROR("CP DMA DAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA DAIC only supported for registers\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad CP DMA DST\n"); + dev_warn_once(p->dev, "bad CP DMA DST\n"); return -EINVAL; } @@ -1836,8 +1841,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } @@ -1848,7 +1853,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ @@ -1856,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, radeon_get_ib_value(p, idx + 2) != 0) { r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1864,7 +1869,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_EVENT_WRITE: if (pkt->count != 2 && pkt->count != 0) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } if (pkt->count) { @@ -1872,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } offset = reloc->gpu_offset + @@ -1888,12 +1893,12 @@ static int r600_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } @@ -1911,7 +1916,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -1927,7 +1932,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) || (start_reg >= PACKET3_SET_CONTEXT_REG_END) || (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONTEXT_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -1939,7 +1944,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SET_RESOURCE: if (pkt->count % 7) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_OFFSET; @@ -1947,7 +1952,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) || (start_reg >= PACKET3_SET_RESOURCE_END) || (end_reg >= PACKET3_SET_RESOURCE_END)) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } for (i = 0; i < (pkt->count / 7); i++) { @@ -1959,7 +1964,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* tex base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1973,7 +1978,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* tex mip base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -1994,15 +1999,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* vtx base */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1+(i*7)+0); size = radeon_get_ib_value(p, idx+1+(i*7)+1) + 1; if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ - dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", - size + offset, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", + size + offset, radeon_bo_size(reloc->robj)); ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; } @@ -2015,7 +2020,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } } @@ -2027,7 +2032,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) || (start_reg >= PACKET3_SET_ALU_CONST_END) || (end_reg >= PACKET3_SET_ALU_CONST_END)) { - DRM_ERROR("bad SET_ALU_CONST\n"); + dev_warn_once(p->dev, "bad SET_ALU_CONST\n"); return -EINVAL; } } @@ -2038,7 +2043,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) || (start_reg >= PACKET3_SET_BOOL_CONST_END) || (end_reg >= PACKET3_SET_BOOL_CONST_END)) { - DRM_ERROR("bad SET_BOOL_CONST\n"); + dev_warn_once(p->dev, "bad SET_BOOL_CONST\n"); return -EINVAL; } break; @@ -2048,7 +2053,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) || (start_reg >= PACKET3_SET_LOOP_CONST_END) || (end_reg >= PACKET3_SET_LOOP_CONST_END)) { - DRM_ERROR("bad SET_LOOP_CONST\n"); + dev_warn_once(p->dev, "bad SET_LOOP_CONST\n"); return -EINVAL; } break; @@ -2058,13 +2063,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) || (start_reg >= PACKET3_SET_CTL_CONST_END) || (end_reg >= PACKET3_SET_CTL_CONST_END)) { - DRM_ERROR("bad SET_CTL_CONST\n"); + dev_warn_once(p->dev, "bad SET_CTL_CONST\n"); return -EINVAL; } break; case PACKET3_SET_SAMPLER: if (pkt->count % 3) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_OFFSET; @@ -2072,22 +2077,22 @@ static int r600_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) || (start_reg >= PACKET3_SET_SAMPLER_END) || (end_reg >= PACKET3_SET_SAMPLER_END)) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BASE_UPDATE: /* RS780 and RS880 also need this */ if (p->family < CHIP_RS780) { - DRM_ERROR("STRMOUT_BASE_UPDATE only supported on 7xx\n"); + dev_warn_once(p->dev, "STRMOUT_BASE_UPDATE only supported on 7xx\n"); return -EINVAL; } if (pkt->count != 1) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE packet count\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE packet count\n"); return -EINVAL; } if (idx_value > 3) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE index\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE index\n"); return -EINVAL; } { @@ -2095,25 +2100,27 @@ static int r600_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE reloc\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE reloc\n"); return -EINVAL; } if (reloc->robj != track->vgt_strmout_bo[idx_value]) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo does not match\n"); + dev_warn_once(p->dev, "bad STRMOUT_BASE_UPDATE, bo does not match\n"); return -EINVAL; } offset = (u64)radeon_get_ib_value(p, idx+1) << 8; if (offset != track->vgt_strmout_bo_offset[idx_value]) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n", - offset, track->vgt_strmout_bo_offset[idx_value]); + dev_warn_once(p->dev, + "bad STRMOUT_BASE_UPDATE, bo offset does not match: 0x%llx, 0x%x\n", + offset, track->vgt_strmout_bo_offset[idx_value]); return -EINVAL; } if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BASE_UPDATE bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); @@ -2121,17 +2128,17 @@ static int r600_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_SURFACE_BASE_UPDATE: if (p->family >= CHIP_RV770 || p->family == CHIP_R600) { - DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + dev_warn_once(p->dev, "bad SURFACE_BASE_UPDATE\n"); return -EINVAL; } if (pkt->count) { - DRM_ERROR("bad SURFACE_BASE_UPDATE\n"); + dev_warn_once(p->dev, "bad SURFACE_BASE_UPDATE\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BUFFER_UPDATE: if (pkt->count != 4) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); return -EINVAL; } /* Updating memory at DST_ADDRESS. */ @@ -2139,14 +2146,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2158,14 +2166,15 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, + "bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2178,23 +2187,23 @@ static int r600_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 3) { - DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+0); offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2204,7 +2213,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, } case PACKET3_COPY_DW: if (pkt->count != 4) { - DRM_ERROR("bad COPY_DW (invalid count)\n"); + dev_warn_once(p->dev, "bad COPY_DW (invalid count)\n"); return -EINVAL; } if (idx_value & 0x1) { @@ -2212,14 +2221,14 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* SRC is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2236,14 +2245,14 @@ static int r600_packet3_check(struct radeon_cs_parser *p, /* DST is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); if (r) { - DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } offset += reloc->gpu_offset; @@ -2259,7 +2268,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2306,7 +2315,7 @@ int r600_cs_parse(struct radeon_cs_parser *p) r = r600_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); kfree(p->track); p->track = NULL; return -EINVAL; @@ -2346,13 +2355,13 @@ int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, *cs_reloc = NULL; if (p->chunk_relocs == NULL) { - DRM_ERROR("No relocation chunk !\n"); + dev_warn_once(p->dev, "No relocation chunk !\n"); return -EINVAL; } idx = p->dma_reloc_idx; if (idx >= p->nrelocs) { - DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", - idx, p->nrelocs); + dev_warn_once(p->dev, "Relocs at %d after relocations chunk end %d !\n", + idx, p->nrelocs); return -EINVAL; } *cs_reloc = &p->relocs[idx]; @@ -2385,8 +2394,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) do { if (p->idx >= ib_chunk->length_dw) { - DRM_ERROR("Can not parse packet at %d after CS end %d !\n", - p->idx, ib_chunk->length_dw); + dev_warn_once(p->dev, "Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); return -EINVAL; } idx = p->idx; @@ -2399,7 +2408,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) case DMA_PACKET_WRITE: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } if (tiled) { @@ -2417,20 +2426,20 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) p->idx += count + 3; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_COPY: r = r600_dma_cs_next_reloc(p, &src_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } if (tiled) { @@ -2484,31 +2493,31 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) } } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA copy src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_CONSTANT_FILL: if (p->family < CHIP_RV770) { - DRM_ERROR("Constant Fill is 7xx only !\n"); + dev_warn_once(p->dev, "Constant Fill is 7xx only !\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); @@ -2519,7 +2528,7 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) p->idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(p->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (p->idx < p->chunk_ib->length_dw); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index b8e6202f1d5b..3f9c0011244f 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -834,7 +834,7 @@ void radeon_cs_dump_packet(struct radeon_cs_parser *p, ib = p->ib.ptr; idx = pkt->idx; for (i = 0; i <= (pkt->count + 1); i++, idx++) - DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); + dev_dbg(p->dev, "ib[%d]=0x%08X\n", idx, ib[idx]); } /** diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index bbd39348a7ab..9e35b14e2bf0 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -26,7 +26,6 @@ * Jerome Glisse */ -#include <linux/console.h> #include <linux/efi.h> #include <linux/pci.h> #include <linux/pm_runtime.h> @@ -555,7 +554,7 @@ int radeon_wb_init(struct radeon_device *rdev) * cover the whole aperture even if VRAM size is inferior to aperture size * Novell bug 204882 + along with lots of ubuntu ones * - * Note 3: when limiting vram it's safe to overwritte real_vram_size because + * Note 3: when limiting vram it's safe to overwrite real_vram_size because * we are not in case where real_vram_size is inferior to mc_vram_size (ie * not affected by bogus hw of Novell bug 204882 + along with lots of ubuntu * ones) @@ -563,7 +562,7 @@ int radeon_wb_init(struct radeon_device *rdev) * Note 4: IGP TOM addr should be the same as the aperture addr, we don't * explicitly check for that thought. * - * FIXME: when reducing VRAM size align new size on power of 2. + * FIXME: when reducing VRAM size, align new size on power of 2. */ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base) { @@ -1635,11 +1634,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, pci_set_power_state(pdev, PCI_D3hot); } - if (notify_clients) { - console_lock(); - drm_client_dev_suspend(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_suspend(dev, false); + return 0; } @@ -1661,17 +1658,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (notify_clients) { - console_lock(); - } if (resume) { pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (pci_enable_device(pdev)) { - if (notify_clients) - console_unlock(); + if (pci_enable_device(pdev)) return -1; - } } /* resume AGP if in use */ radeon_agp_resume(rdev); @@ -1747,10 +1738,8 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) radeon_pm_compute_clocks(rdev); - if (notify_clients) { - drm_client_dev_resume(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_resume(dev, false); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 8f5f8abcb1b4..351b9dfcdad8 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -926,10 +926,10 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, unsigned *fb_div, unsigned *ref_div) { /* limit reference * post divider to a maximum */ - ref_div_max = max(min(100 / post_div, ref_div_max), 1u); + ref_div_max = clamp(100 / post_div, 1u, ref_div_max); /* get matching reference and feedback divider */ - *ref_div = min(max(den/post_div, 1u), ref_div_max); + *ref_div = clamp(den / post_div, 1u, ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ @@ -1297,12 +1297,13 @@ static const struct drm_framebuffer_funcs radeon_fb_funcs = { int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; fb->obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { fb->obj[0] = NULL; @@ -1314,6 +1315,7 @@ radeon_framebuffer_init(struct drm_device *dev, static struct drm_framebuffer * radeon_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; @@ -1340,7 +1342,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj); + ret = radeon_framebuffer_init(dev, fb, info, mode_cmd, obj); if (ret) { kfree(fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 267f082bc430..9c8907bc61d9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -110,9 +110,10 @@ * 2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI * 2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible values * 2.50.0 - Allows unaligned shader loads on CIK. (needed by OpenGL) + * 2.51.0 - Add evergreen/cayman OpenGL 4.6 compatibility */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 50 +#define KMS_DRIVER_MINOR 51 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_no_wb; @@ -313,17 +314,17 @@ static int radeon_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) - goto err_free; + return ret; pci_set_drvdata(pdev, ddev); ret = radeon_driver_load_kms(ddev, flags); if (ret) - goto err_agp; + goto err; ret = drm_dev_register(ddev, flags); if (ret) - goto err_agp; + goto err; if (rdev->mc.real_vram_size <= (8 * 1024 * 1024)) format = drm_format_info(DRM_FORMAT_C8); @@ -336,30 +337,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, return 0; -err_agp: +err: pci_disable_device(pdev); -err_free: - drm_dev_put(ddev); return ret; } static void -radeon_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - drm_put_dev(dev); -} - -static void radeon_pci_shutdown(struct pci_dev *pdev) { - /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown - */ - if (radeon_device_is_virtual()) - radeon_pci_remove(pdev); - #if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a @@ -612,7 +597,6 @@ static struct pci_driver radeon_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, .probe = radeon_pci_probe, - .remove = radeon_pci_remove, .shutdown = radeon_pci_shutdown, .driver.pm = &radeon_pm_ops, }; diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index d4a58bd679db..dc81b0c2dbff 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -53,10 +53,10 @@ static void radeon_fbdev_destroy_pinned_object(struct drm_gem_object *gobj) } static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **gobj_p) { - const struct drm_format_info *info; struct radeon_device *rdev = fb_helper->dev->dev_private; struct drm_gem_object *gobj = NULL; struct radeon_bo *rbo = NULL; @@ -67,7 +67,6 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -205,6 +204,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct radeon_device *rdev = fb_helper->dev->dev_private; + const struct drm_format_info *format_info; struct drm_mode_fb_cmd2 mode_cmd = { }; struct fb_info *info; struct drm_gem_object *gobj; @@ -223,7 +223,9 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - ret = radeon_fbdev_create_pinned_object(fb_helper, &mode_cmd, &gobj); + format_info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd.pixel_format, + mode_cmd.modifier[0]); + ret = radeon_fbdev_create_pinned_object(fb_helper, format_info, &mode_cmd, &gobj); if (ret) { DRM_ERROR("failed to create fbcon object %d\n", ret); return ret; @@ -235,7 +237,7 @@ int radeon_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, format_info, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4bb242437ff6..acd89a20f272 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -346,14 +346,14 @@ int radeon_gart_init(struct radeon_device *rdev) DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages); /* Allocate pages table */ - rdev->gart.pages = vzalloc(array_size(sizeof(void *), - rdev->gart.num_cpu_pages)); + rdev->gart.pages = vcalloc(rdev->gart.num_cpu_pages, + sizeof(void *)); if (rdev->gart.pages == NULL) { radeon_gart_fini(rdev); return -ENOMEM; } - rdev->gart.pages_entry = vmalloc(array_size(sizeof(uint64_t), - rdev->gart.num_gpu_pages)); + rdev->gart.pages_entry = vmalloc_array(rdev->gart.num_gpu_pages, + sizeof(uint64_t)); if (rdev->gart.pages_entry == NULL) { radeon_gart_fini(rdev); return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 645e33bf7947..ba1446acd703 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -84,7 +84,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) rdev->agp = NULL; done_free: - kfree(rdev); dev->dev_private = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index d6aa1a3012a8..d1e8b9757a65 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -136,9 +136,9 @@ static void radeon_legacy_lvds_update(struct drm_encoder *encoder, int mode) } if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -545,9 +545,9 @@ static void radeon_legacy_primary_dac_dpms(struct drm_encoder *encoder, int mode WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -742,9 +742,9 @@ static void radeon_legacy_tmds_int_dpms(struct drm_encoder *encoder, int mode) WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -908,9 +908,9 @@ static void radeon_legacy_tmds_ext_dpms(struct drm_encoder *encoder, int mode) WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl); if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } @@ -1113,9 +1113,9 @@ static void radeon_legacy_tv_dac_dpms(struct drm_encoder *encoder, int mode) } if (rdev->is_atom_bios) - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_atombios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); else - radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); + radeon_combios_encoder_dpms_scratch_regs(encoder, mode == DRM_MODE_DPMS_ON); } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3102f6c2d055..9e34da2cacef 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -40,6 +40,7 @@ struct drm_fb_helper; struct drm_fb_helper_surface_size; +struct drm_format_info; struct edid; struct drm_edid; @@ -890,6 +891,7 @@ extern void radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on); int radeon_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *rfb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index b4fb7e70320b..a855a96dd2ea 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -907,8 +907,7 @@ static void radeon_dpm_thermal_work_handler(struct work_struct *work) static bool radeon_dpm_single_display(struct radeon_device *rdev) { - bool single_display = (rdev->pm.dpm.new_active_crtc_count < 2) ? - true : false; + bool single_display = rdev->pm.dpm.new_active_crtc_count < 2; /* check if the vblank period is too short to adjust the mclk */ if (single_display && rdev->asic->dpm.vblank_too_short) { diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index c9fef9b61ced..818554e60537 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -455,7 +455,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, r = radeon_ring_lock(rdev, ringC, 64); if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); + DRM_ERROR("Failed to lock ring C %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); @@ -481,7 +481,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, r = radeon_ring_lock(rdev, ringC, 64); if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); + DRM_ERROR("Failed to lock ring C %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 2355a78e1b69..bdbc1bbe8a9b 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -86,7 +86,7 @@ int radeon_vce_init(struct radeon_device *rdev) r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); if (r) { - dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", + dev_err(rdev->dev, "radeon_vce: can't load firmware \"%s\"\n", fw_name); return r; } @@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev) rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); - /* we can only work with this fw version for now */ + /* we can only work with these fw versions for now */ if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) && (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) && (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8)))) @@ -281,7 +281,7 @@ static void radeon_vce_idle_work_handler(struct work_struct *work) * * @rdev: radeon_device pointer * - * Make sure VCE is powerd up when we want to use it + * Make sure VCE is powered up when we want to use it */ void radeon_vce_note_usage(struct radeon_device *rdev) { diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c index 4c8fe83dd610..216219accfd9 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c @@ -426,6 +426,7 @@ int rcar_du_dumb_create(struct drm_file *file, struct drm_device *dev, static struct drm_framebuffer * rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct rcar_du_device *rcdu = to_rcar_du_device(dev); @@ -490,7 +491,7 @@ rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c b/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c index af58b814e588..001b3543924a 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_lvds.c @@ -1013,7 +1013,7 @@ err_reset_assert: } static const struct dev_pm_ops rcar_lvds_pm_ops = { - SET_RUNTIME_PM_OPS(rcar_lvds_runtime_suspend, rcar_lvds_runtime_resume, NULL) + RUNTIME_PM_OPS(rcar_lvds_runtime_suspend, rcar_lvds_runtime_resume, NULL) }; static struct platform_driver rcar_lvds_platform_driver = { @@ -1021,7 +1021,7 @@ static struct platform_driver rcar_lvds_platform_driver = { .remove = rcar_lvds_remove, .driver = { .name = "rcar-lvds", - .pm = &rcar_lvds_pm_ops, + .pm = pm_ptr(&rcar_lvds_pm_ops), .of_match_table = rcar_lvds_of_table, }, }; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c index 1af4c73f7a88..5c73a513f678 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c @@ -576,7 +576,10 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi, udelay(10); rcar_mipi_dsi_clr(dsi, CLOCKSET1, CLOCKSET1_UPDATEPLL); - ppisetr = PPISETR_DLEN_3 | PPISETR_CLEN; + rcar_mipi_dsi_clr(dsi, TXSETR, TXSETR_LANECNT_MASK); + rcar_mipi_dsi_set(dsi, TXSETR, dsi->lanes - 1); + + ppisetr = ((BIT(dsi->lanes) - 1) & PPISETR_DLEN_MASK) | PPISETR_CLEN; rcar_mipi_dsi_write(dsi, PPISETR, ppisetr); rcar_mipi_dsi_set(dsi, PHYSETUP, PHYSETUP_SHUTDOWNZ); @@ -934,9 +937,234 @@ static int rcar_mipi_dsi_host_detach(struct mipi_dsi_host *host, return 0; } +static ssize_t rcar_mipi_dsi_host_tx_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg, + bool is_rx_xfer) +{ + const bool is_tx_long = mipi_dsi_packet_format_is_long(msg->type); + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + struct mipi_dsi_packet packet; + u8 payload[16] = { 0 }; + u32 status; + int ret; + + ret = mipi_dsi_create_packet(&packet, msg); + if (ret) + return ret; + + /* Configure LP or HS command transfer. */ + rcar_mipi_dsi_write(dsi, TXCMSETR, (msg->flags & MIPI_DSI_MSG_USE_LPM) ? + TXCMSETR_SPDTYP : 0); + + /* Register access mode for RX transfer. */ + if (is_rx_xfer) + rcar_mipi_dsi_write(dsi, RXPSETR, 0); + + /* Do not use IRQ, poll for completion, the completion is quick. */ + rcar_mipi_dsi_write(dsi, TXCMIER, 0); + + /* + * Send the header: + * header[0] = Virtual Channel + Data Type + * header[1] = Word Count LSB (LP) or first param (SP) + * header[2] = Word Count MSB (LP) or second param (SP) + */ + rcar_mipi_dsi_write(dsi, TXCMPHDR, + (is_tx_long ? TXCMPHDR_FMT : 0) | + TXCMPHDR_VC(msg->channel) | + TXCMPHDR_DT(msg->type) | + TXCMPHDR_DATA1(packet.header[2]) | + TXCMPHDR_DATA0(packet.header[1])); + + if (is_tx_long) { + memcpy(payload, packet.payload, + min(msg->tx_len, sizeof(payload))); + + rcar_mipi_dsi_write(dsi, TXCMPPD0R, + (payload[3] << 24) | (payload[2] << 16) | + (payload[1] << 8) | payload[0]); + rcar_mipi_dsi_write(dsi, TXCMPPD1R, + (payload[7] << 24) | (payload[6] << 16) | + (payload[5] << 8) | payload[4]); + rcar_mipi_dsi_write(dsi, TXCMPPD2R, + (payload[11] << 24) | (payload[10] << 16) | + (payload[9] << 8) | payload[8]); + rcar_mipi_dsi_write(dsi, TXCMPPD3R, + (payload[15] << 24) | (payload[14] << 16) | + (payload[13] << 8) | payload[12]); + } + + /* Start the transfer, RX with BTA, TX without BTA. */ + if (is_rx_xfer) { + rcar_mipi_dsi_write(dsi, TXCMCR, TXCMCR_BTAREQ); + + /* Wait until the transmission, BTA, reception completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + (status & RXPSR_BTAREQEND), + 2000, 50000, false, dsi, RXPSR); + } else { + rcar_mipi_dsi_write(dsi, TXCMCR, TXCMCR_TXREQ); + + /* Wait until the transmission completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + (status & TXCMSR_TXREQEND), + 2000, 50000, false, dsi, TXCMSR); + } + + if (ret < 0) { + dev_err(dsi->dev, "Command transfer timeout (0x%08x)\n", + status); + return ret; + } + + return packet.size; +} + +static ssize_t rcar_mipi_dsi_host_rx_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + u8 *rx_buf = (u8 *)(msg->rx_buf); + u32 reg, data, status, wc; + int i, ret; + + /* RX transfer received data validation and parsing starts here. */ + reg = rcar_mipi_dsi_read(dsi, TOSR); + if (reg & TOSR_TATO) { /* Turn-Around TimeOut. */ + /* Clear TATO Turn-Around TimeOut bit. */ + rcar_mipi_dsi_write(dsi, TOSR, TOSR_TATO); + return -ETIMEDOUT; + } + + reg = rcar_mipi_dsi_read(dsi, RXPSR); + + if (msg->flags & MIPI_DSI_MSG_REQ_ACK) { + /* Transfer with zero-length RX. */ + if (!(reg & RXPSR_RCVACK)) { + /* No ACK on RX response received. */ + return -EINVAL; + } + } else { + /* Transfer with non-zero-length RX. */ + if (!(reg & RXPSR_RCVRESP)) { + /* No packet header of RX response received. */ + return -EINVAL; + } + + if (reg & (RXPSR_CRCERR | RXPSR_WCERR | RXPSR_AXIERR | RXPSR_OVRERR)) { + /* Incorrect response payload. */ + return -ENODATA; + } + + data = rcar_mipi_dsi_read(dsi, RXPHDR); + if (data & RXPHDR_FMT) { /* Long Packet Response. */ + /* Read Long Packet Response length from packet header. */ + wc = data & 0xffff; + if (wc > msg->rx_len) { + dev_warn(dsi->dev, + "Long Packet Response longer than RX buffer (%d), limited to %zu Bytes\n", + wc, msg->rx_len); + wc = msg->rx_len; + } + + if (wc > 16) { + dev_warn(dsi->dev, + "Long Packet Response too long (%d), limited to 16 Bytes\n", + wc); + wc = 16; + } + + for (i = 0; i < msg->rx_len; i++) { + if (!(i % 4)) + data = rcar_mipi_dsi_read(dsi, RXPPD0R + i); + + rx_buf[i] = data & 0xff; + data >>= 8; + } + } else { /* Short Packet Response. */ + if (msg->rx_len >= 1) + rx_buf[0] = data & 0xff; + if (msg->rx_len >= 2) + rx_buf[1] = (data >> 8) & 0xff; + if (msg->rx_len >= 3) { + dev_warn(dsi->dev, + "Expected Short Packet Response too long (%zu), limited to 2 Bytes\n", + msg->rx_len); + } + } + } + + if (reg & RXPSR_RCVAKE) { + /* Acknowledge and Error report received. */ + return -EFAULT; + } + + /* Wait until the bus handover to host processor completed. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + !(status & PPIDL0SR_DIR), + 2000, 50000, false, dsi, PPIDL0SR); + if (ret < 0) { + dev_err(dsi->dev, "Command RX DIR timeout (0x%08x)\n", status); + return ret; + } + + /* Wait until the data lane is in LP11 stop state. */ + ret = read_poll_timeout(rcar_mipi_dsi_read, status, + status & PPIDL0SR_STPST, + 2000, 50000, false, dsi, PPIDL0SR); + if (ret < 0) { + dev_err(dsi->dev, "Command RX STPST timeout (0x%08x)\n", status); + return ret; + } + + return 0; +} + +static ssize_t rcar_mipi_dsi_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + const bool is_rx_xfer = (msg->flags & MIPI_DSI_MSG_REQ_ACK) || msg->rx_len; + struct rcar_mipi_dsi *dsi = host_to_rcar_mipi_dsi(host); + int ret; + + if (msg->tx_len > 16 || msg->rx_len > 16) { + /* ToDo: Implement Memory on AXI bus command mode. */ + dev_warn(dsi->dev, + "Register-based command mode supports only up to 16 Bytes long payload\n"); + return -EOPNOTSUPP; + } + + ret = rcar_mipi_dsi_host_tx_transfer(host, msg, is_rx_xfer); + + /* If TX transfer succeeded and this transfer has RX part. */ + if (ret >= 0 && is_rx_xfer) { + ret = rcar_mipi_dsi_host_rx_transfer(host, msg); + if (ret) + return ret; + + ret = msg->rx_len; + } + + /* + * Wait a bit between commands, otherwise panels based on ILI9881C + * TCON may fail to correctly receive all commands sent to them. + * Until we can actually test with another DSI device, keep the + * delay here, but eventually this delay might have to be moved + * into the ILI9881C panel driver. + */ + usleep_range(1000, 2000); + + /* Clear the completion interrupt. */ + if (!msg->rx_len) + rcar_mipi_dsi_write(dsi, TXCMSR, TXCMSR_TXREQEND); + + return ret; +} + static const struct mipi_dsi_host_ops rcar_mipi_dsi_host_ops = { .attach = rcar_mipi_dsi_host_attach, .detach = rcar_mipi_dsi_host_detach, + .transfer = rcar_mipi_dsi_host_transfer }; /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h index a6b276f1d6ee..76521276e2af 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h @@ -12,6 +12,130 @@ #define LINKSR_LPBUSY (1 << 1) #define LINKSR_HSBUSY (1 << 0) +#define TXSETR 0x100 +#define TXSETR_LANECNT_MASK (0x3 << 0) + +/* + * DSI Command Transfer Registers + */ +#define TXCMSETR 0x110 +#define TXCMSETR_SPDTYP (1 << 8) /* 0:HS 1:LP */ +#define TXCMSETR_LPPDACC (1 << 0) +#define TXCMCR 0x120 +#define TXCMCR_BTATYP (1 << 2) +#define TXCMCR_BTAREQ (1 << 1) +#define TXCMCR_TXREQ (1 << 0) +#define TXCMSR 0x130 +#define TXCMSR_CLSNERR (1 << 18) +#define TXCMSR_AXIERR (1 << 16) +#define TXCMSR_TXREQEND (1 << 0) +#define TXCMSCR 0x134 +#define TXCMSCR_CLSNERR (1 << 18) +#define TXCMSCR_AXIERR (1 << 16) +#define TXCMSCR_TXREQEND (1 << 0) +#define TXCMIER 0x138 +#define TXCMIER_CLSNERR (1 << 18) +#define TXCMIER_AXIERR (1 << 16) +#define TXCMIER_TXREQEND (1 << 0) +#define TXCMADDRSET0R 0x140 +#define TXCMPHDR 0x150 +#define TXCMPHDR_FMT (1 << 24) /* 0:SP 1:LP */ +#define TXCMPHDR_VC(n) (((n) & 0x3) << 22) +#define TXCMPHDR_DT(n) (((n) & 0x3f) << 16) +#define TXCMPHDR_DATA1(n) (((n) & 0xff) << 8) +#define TXCMPHDR_DATA0(n) (((n) & 0xff) << 0) +#define TXCMPPD0R 0x160 +#define TXCMPPD1R 0x164 +#define TXCMPPD2R 0x168 +#define TXCMPPD3R 0x16c + +#define RXSETR 0x200 +#define RXSETR_CRCEN (((n) & 0xf) << 24) +#define RXSETR_ECCEN (((n) & 0xf) << 16) +#define RXPSETR 0x210 +#define RXPSETR_LPPDACC (1 << 0) +#define RXPSR 0x220 +#define RXPSR_ECCERR1B (1 << 28) +#define RXPSR_UEXTRGERR (1 << 25) +#define RXPSR_RESPTOERR (1 << 24) +#define RXPSR_OVRERR (1 << 23) +#define RXPSR_AXIERR (1 << 22) +#define RXPSR_CRCERR (1 << 21) +#define RXPSR_WCERR (1 << 20) +#define RXPSR_UEXDTERR (1 << 19) +#define RXPSR_UEXPKTERR (1 << 18) +#define RXPSR_ECCERR (1 << 17) +#define RXPSR_MLFERR (1 << 16) +#define RXPSR_RCVACK (1 << 14) +#define RXPSR_RCVEOT (1 << 10) +#define RXPSR_RCVAKE (1 << 9) +#define RXPSR_RCVRESP (1 << 8) +#define RXPSR_BTAREQEND (1 << 0) +#define RXPSCR 0x224 +#define RXPSCR_ECCERR1B (1 << 28) +#define RXPSCR_UEXTRGERR (1 << 25) +#define RXPSCR_RESPTOERR (1 << 24) +#define RXPSCR_OVRERR (1 << 23) +#define RXPSCR_AXIERR (1 << 22) +#define RXPSCR_CRCERR (1 << 21) +#define RXPSCR_WCERR (1 << 20) +#define RXPSCR_UEXDTERR (1 << 19) +#define RXPSCR_UEXPKTERR (1 << 18) +#define RXPSCR_ECCERR (1 << 17) +#define RXPSCR_MLFERR (1 << 16) +#define RXPSCR_RCVACK (1 << 14) +#define RXPSCR_RCVEOT (1 << 10) +#define RXPSCR_RCVAKE (1 << 9) +#define RXPSCR_RCVRESP (1 << 8) +#define RXPSCR_BTAREQEND (1 << 0) +#define RXPIER 0x228 +#define RXPIER_ECCERR1B (1 << 28) +#define RXPIER_UEXTRGERR (1 << 25) +#define RXPIER_RESPTOERR (1 << 24) +#define RXPIER_OVRERR (1 << 23) +#define RXPIER_AXIERR (1 << 22) +#define RXPIER_CRCERR (1 << 21) +#define RXPIER_WCERR (1 << 20) +#define RXPIER_UEXDTERR (1 << 19) +#define RXPIER_UEXPKTERR (1 << 18) +#define RXPIER_ECCERR (1 << 17) +#define RXPIER_MLFERR (1 << 16) +#define RXPIER_RCVACK (1 << 14) +#define RXPIER_RCVEOT (1 << 10) +#define RXPIER_RCVAKE (1 << 9) +#define RXPIER_RCVRESP (1 << 8) +#define RXPIER_BTAREQEND (1 << 0) +#define RXPADDRSET0R 0x230 +#define RXPSIZESETR 0x238 +#define RXPSIZESETR_SIZE(n) (((n) & 0xf) << 3) +#define RXPHDR 0x240 +#define RXPHDR_FMT (1 << 24) /* 0:SP 1:LP */ +#define RXPHDR_VC(n) (((n) & 0x3) << 22) +#define RXPHDR_DT(n) (((n) & 0x3f) << 16) +#define RXPHDR_DATA1(n) (((n) & 0xff) << 8) +#define RXPHDR_DATA0(n) (((n) & 0xff) << 0) +#define RXPPD0R 0x250 +#define RXPPD1R 0x254 +#define RXPPD2R 0x258 +#define RXPPD3R 0x25c +#define AKEPR 0x300 +#define AKEPR_VC(n) (((n) & 0x3) << 22) +#define AKEPR_DT(n) (((n) & 0x3f) << 16) +#define AKEPR_ERRRPT(n) (((n) & 0xffff) << 0) +#define RXRESPTOSETR 0x400 +#define TACR 0x500 +#define TASR 0x510 +#define TASCR 0x514 +#define TAIER 0x518 +#define TOSR 0x610 +#define TOSR_TATO (1 << 2) +#define TOSR_LRXHTO (1 << 1) +#define TOSR_HRXTO (1 << 0) +#define TOSCR 0x614 +#define TOSCR_TATO (1 << 2) +#define TOSCR_LRXHTO (1 << 1) +#define TOSCR_HRXTO (1 << 0) + /* * Video Mode Register */ @@ -80,10 +204,7 @@ * PHY-Protocol Interface (PPI) Registers */ #define PPISETR 0x700 -#define PPISETR_DLEN_0 (0x1 << 0) -#define PPISETR_DLEN_1 (0x3 << 0) -#define PPISETR_DLEN_2 (0x7 << 0) -#define PPISETR_DLEN_3 (0xf << 0) +#define PPISETR_DLEN_MASK (0xf << 0) #define PPISETR_CLEN (1 << 8) #define PPICLCR 0x710 @@ -100,6 +221,10 @@ #define PPICLSCR_HSTOLP (1 << 27) #define PPICLSCR_TOHS (1 << 26) +#define PPIDL0SR 0x740 +#define PPIDL0SR_DIR (1 << 10) +#define PPIDL0SR_STPST (1 << 6) + #define PPIDLSR 0x760 #define PPIDLSR_STPST (0xf << 0) diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c index 55a97691e9b2..87f171145a23 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c @@ -191,6 +191,7 @@ int rzg2l_du_dumb_create(struct drm_file *file, struct drm_device *dev, static struct drm_framebuffer * rzg2l_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct rzg2l_du_format_info *format; @@ -214,7 +215,7 @@ rzg2l_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c index f87337c3cbb5..3b52dfc0ea1e 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_mipi_dsi.c @@ -913,7 +913,7 @@ static const struct mipi_dsi_host_ops rzg2l_mipi_dsi_host_ops = { * Power Management */ -static int __maybe_unused rzg2l_mipi_pm_runtime_suspend(struct device *dev) +static int rzg2l_mipi_pm_runtime_suspend(struct device *dev) { struct rzg2l_mipi_dsi *dsi = dev_get_drvdata(dev); @@ -923,7 +923,7 @@ static int __maybe_unused rzg2l_mipi_pm_runtime_suspend(struct device *dev) return 0; } -static int __maybe_unused rzg2l_mipi_pm_runtime_resume(struct device *dev) +static int rzg2l_mipi_pm_runtime_resume(struct device *dev) { struct rzg2l_mipi_dsi *dsi = dev_get_drvdata(dev); int ret; @@ -940,7 +940,7 @@ static int __maybe_unused rzg2l_mipi_pm_runtime_resume(struct device *dev) } static const struct dev_pm_ops rzg2l_mipi_pm_ops = { - SET_RUNTIME_PM_OPS(rzg2l_mipi_pm_runtime_suspend, rzg2l_mipi_pm_runtime_resume, NULL) + RUNTIME_PM_OPS(rzg2l_mipi_pm_runtime_suspend, rzg2l_mipi_pm_runtime_resume, NULL) }; /* ----------------------------------------------------------------------------- @@ -1072,7 +1072,7 @@ static struct platform_driver rzg2l_mipi_dsi_platform_driver = { .remove = rzg2l_mipi_dsi_remove, .driver = { .name = "rzg2l-mipi-dsi", - .pm = &rzg2l_mipi_pm_ops, + .pm = pm_ptr(&rzg2l_mipi_pm_ops), .of_match_table = rzg2l_mipi_dsi_of_table, }, }; diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c index 4202ab00fb0c..fd9460da1789 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c @@ -117,6 +117,7 @@ const struct shmob_drm_format_info *shmob_drm_format_info(u32 fourcc) static struct drm_framebuffer * shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct shmob_drm_format_info *format; @@ -144,7 +145,7 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } static const struct drm_mode_config_funcs shmob_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index ab525668939a..b7b025814e72 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -10,6 +10,7 @@ config DRM_ROCKCHIP select VIDEOMODE_HELPERS select DRM_ANALOGIX_DP if ROCKCHIP_ANALOGIX_DP select DRM_DISPLAY_DP_AUX_BUS if ROCKCHIP_ANALOGIX_DP + select DRM_DW_DP if ROCKCHIP_DW_DP select DRM_DW_HDMI if ROCKCHIP_DW_HDMI select DRM_DW_HDMI_QP if ROCKCHIP_DW_HDMI_QP select DRM_DW_MIPI_DSI if ROCKCHIP_DW_MIPI_DSI @@ -53,6 +54,7 @@ config ROCKCHIP_CDN_DP bool "Rockchip cdn DP" depends on EXTCON=y || (EXTCON=m && DRM_ROCKCHIP=m) select DRM_DISPLAY_HELPER + select DRM_BRIDGE_CONNECTOR select DRM_DISPLAY_DP_HELPER help This selects support for Rockchip SoC specific extensions @@ -60,6 +62,14 @@ config ROCKCHIP_CDN_DP RK3399 based SoC, you should select this option. +config ROCKCHIP_DW_DP + bool "Rockchip specific extensions for Synopsys DW DP" + help + This selects support for Rockchip SoC specific extensions + to enable Synopsys DesignWare Cores based DisplayPort transmit + controller support on Rockchip SoC, If you want to enable DP on + rk3588 based SoC, you should select this option. + config ROCKCHIP_DW_HDMI bool "Rockchip specific extensions for Synopsys DW HDMI" help diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index 2b867cebbc12..097f062399c7 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -14,6 +14,7 @@ rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI_QP) += dw_hdmi_qp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI2) += dw-mipi-dsi2-rockchip.o +rockchipdrm-$(CONFIG_ROCKCHIP_DW_DP) += dw_dp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o rockchipdrm-$(CONFIG_ROCKCHIP_LVDS) += rockchip_lvds.o rockchipdrm-$(CONFIG_ROCKCHIP_RGB) += rockchip_rgb.o diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index 292c31de18f1..b7e3f5dcf8d5 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -16,7 +16,9 @@ #include <sound/hdmi-codec.h> #include <drm/display/drm_dp_helper.h> +#include <drm/display/drm_hdmi_audio_helper.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge_connector.h> #include <drm/drm_edid.h> #include <drm/drm_of.h> #include <drm/drm_probe_helper.h> @@ -25,9 +27,9 @@ #include "cdn-dp-core.h" #include "cdn-dp-reg.h" -static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector) +static inline struct cdn_dp_device *bridge_to_dp(struct drm_bridge *bridge) { - return container_of(connector, struct cdn_dp_device, connector); + return container_of(bridge, struct cdn_dp_device, bridge); } static inline struct cdn_dp_device *encoder_to_dp(struct drm_encoder *encoder) @@ -231,9 +233,9 @@ static bool cdn_dp_check_sink_connection(struct cdn_dp_device *dp) } static enum drm_connector_status -cdn_dp_connector_detect(struct drm_connector *connector, bool force) +cdn_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { - struct cdn_dp_device *dp = connector_to_dp(connector); + struct cdn_dp_device *dp = bridge_to_dp(bridge); enum drm_connector_status status = connector_status_disconnected; mutex_lock(&dp->lock); @@ -244,41 +246,25 @@ cdn_dp_connector_detect(struct drm_connector *connector, bool force) return status; } -static void cdn_dp_connector_destroy(struct drm_connector *connector) +static const struct drm_edid * +cdn_dp_bridge_edid_read(struct drm_bridge *bridge, struct drm_connector *connector) { - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static const struct drm_connector_funcs cdn_dp_atomic_connector_funcs = { - .detect = cdn_dp_connector_detect, - .destroy = cdn_dp_connector_destroy, - .fill_modes = drm_helper_probe_single_connector_modes, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static int cdn_dp_connector_get_modes(struct drm_connector *connector) -{ - struct cdn_dp_device *dp = connector_to_dp(connector); - int ret = 0; + struct cdn_dp_device *dp = bridge_to_dp(bridge); + const struct drm_edid *drm_edid; mutex_lock(&dp->lock); - - ret = drm_edid_connector_add_modes(connector); - + drm_edid = drm_edid_read_custom(connector, cdn_dp_get_edid_block, dp); mutex_unlock(&dp->lock); - return ret; + return drm_edid; } static enum drm_mode_status -cdn_dp_connector_mode_valid(struct drm_connector *connector, - const struct drm_display_mode *mode) +cdn_dp_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *display_info, + const struct drm_display_mode *mode) { - struct cdn_dp_device *dp = connector_to_dp(connector); - struct drm_display_info *display_info = &dp->connector.display_info; + struct cdn_dp_device *dp = bridge_to_dp(bridge); u32 requested, actual, rate, sink_max, source_max = 0; u8 lanes, bpc; @@ -323,11 +309,6 @@ cdn_dp_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } -static struct drm_connector_helper_funcs cdn_dp_connector_helper_funcs = { - .get_modes = cdn_dp_connector_get_modes, - .mode_valid = cdn_dp_connector_mode_valid, -}; - static int cdn_dp_firmware_init(struct cdn_dp_device *dp) { int ret; @@ -360,7 +341,6 @@ static int cdn_dp_firmware_init(struct cdn_dp_device *dp) static int cdn_dp_get_sink_capability(struct cdn_dp_device *dp) { - const struct drm_display_info *info = &dp->connector.display_info; int ret; if (!cdn_dp_check_sink_connection(dp)) @@ -373,17 +353,6 @@ static int cdn_dp_get_sink_capability(struct cdn_dp_device *dp) return ret; } - drm_edid_free(dp->drm_edid); - dp->drm_edid = drm_edid_read_custom(&dp->connector, - cdn_dp_get_edid_block, dp); - drm_edid_connector_update(&dp->connector, dp->drm_edid); - - dp->sink_has_audio = info->has_audio; - - if (dp->drm_edid) - DRM_DEV_DEBUG_KMS(dp->dev, "got edid: width[%d] x height[%d]\n", - info->width_mm / 10, info->height_mm / 10); - return 0; } @@ -488,10 +457,6 @@ static int cdn_dp_disable(struct cdn_dp_device *dp) dp->active = false; dp->max_lanes = 0; dp->max_rate = 0; - if (!dp->connected) { - drm_edid_free(dp->drm_edid); - dp->drm_edid = NULL; - } return 0; } @@ -546,26 +511,13 @@ err_clk_disable: return ret; } -static void cdn_dp_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted) +static void cdn_dp_bridge_mode_set(struct drm_bridge *bridge, + const struct drm_display_mode *mode, + const struct drm_display_mode *adjusted) { - struct cdn_dp_device *dp = encoder_to_dp(encoder); - struct drm_display_info *display_info = &dp->connector.display_info; + struct cdn_dp_device *dp = bridge_to_dp(bridge); struct video_info *video = &dp->video_info; - switch (display_info->bpc) { - case 10: - video->color_depth = 10; - break; - case 6: - video->color_depth = 6; - break; - default: - video->color_depth = 8; - break; - } - video->color_fmt = PXL_RGB; video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); @@ -592,19 +544,37 @@ static bool cdn_dp_check_link_status(struct cdn_dp_device *dp) return drm_dp_channel_eq_ok(link_status, min(port->lanes, sink_lanes)); } -static void cdn_dp_audio_handle_plugged_change(struct cdn_dp_device *dp, - bool plugged) +static void cdn_dp_display_info_update(struct cdn_dp_device *dp, + struct drm_display_info *display_info) { - if (dp->codec_dev) - dp->plugged_cb(dp->codec_dev, plugged); + struct video_info *video = &dp->video_info; + + switch (display_info->bpc) { + case 10: + video->color_depth = 10; + break; + case 6: + video->color_depth = 6; + break; + default: + video->color_depth = 8; + break; + } } -static void cdn_dp_encoder_enable(struct drm_encoder *encoder) +static void cdn_dp_bridge_atomic_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { - struct cdn_dp_device *dp = encoder_to_dp(encoder); + struct cdn_dp_device *dp = bridge_to_dp(bridge); + struct drm_connector *connector; int ret, val; - ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node, encoder); + connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder); + if (!connector) + return; + + cdn_dp_display_info_update(dp, &connector->display_info); + + ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node, &dp->encoder.encoder); if (ret < 0) { DRM_DEV_ERROR(dp->dev, "Could not get vop id, %d", ret); return; @@ -625,7 +595,7 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder) ret = cdn_dp_enable(dp); if (ret) { - DRM_DEV_ERROR(dp->dev, "Failed to enable encoder %d\n", + DRM_DEV_ERROR(dp->dev, "Failed to enable bridge %d\n", ret); goto out; } @@ -655,24 +625,21 @@ static void cdn_dp_encoder_enable(struct drm_encoder *encoder) goto out; } - cdn_dp_audio_handle_plugged_change(dp, true); - out: mutex_unlock(&dp->lock); } -static void cdn_dp_encoder_disable(struct drm_encoder *encoder) +static void cdn_dp_bridge_atomic_disable(struct drm_bridge *bridge, struct drm_atomic_state *state) { - struct cdn_dp_device *dp = encoder_to_dp(encoder); + struct cdn_dp_device *dp = bridge_to_dp(bridge); int ret; mutex_lock(&dp->lock); - cdn_dp_audio_handle_plugged_change(dp, false); if (dp->active) { ret = cdn_dp_disable(dp); if (ret) { - DRM_DEV_ERROR(dp->dev, "Failed to disable encoder %d\n", + DRM_DEV_ERROR(dp->dev, "Failed to disable bridge %d\n", ret); } } @@ -704,9 +671,6 @@ static int cdn_dp_encoder_atomic_check(struct drm_encoder *encoder, } static const struct drm_encoder_helper_funcs cdn_dp_encoder_helper_funcs = { - .mode_set = cdn_dp_encoder_mode_set, - .enable = cdn_dp_encoder_enable, - .disable = cdn_dp_encoder_disable, .atomic_check = cdn_dp_encoder_atomic_check, }; @@ -779,11 +743,12 @@ static int cdn_dp_parse_dt(struct cdn_dp_device *dp) return 0; } -static int cdn_dp_audio_hw_params(struct device *dev, void *data, - struct hdmi_codec_daifmt *daifmt, - struct hdmi_codec_params *params) +static int cdn_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, + struct hdmi_codec_daifmt *daifmt, + struct hdmi_codec_params *params) { - struct cdn_dp_device *dp = dev_get_drvdata(dev); + struct cdn_dp_device *dp = bridge_to_dp(bridge); struct audio_info audio = { .sample_width = params->sample_width, .sample_rate = params->sample_rate, @@ -805,7 +770,7 @@ static int cdn_dp_audio_hw_params(struct device *dev, void *data, audio.format = AFMT_SPDIF; break; default: - DRM_DEV_ERROR(dev, "Invalid format %d\n", daifmt->fmt); + drm_err(bridge->dev, "Invalid format %d\n", daifmt->fmt); ret = -EINVAL; goto out; } @@ -819,9 +784,10 @@ out: return ret; } -static void cdn_dp_audio_shutdown(struct device *dev, void *data) +static void cdn_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { - struct cdn_dp_device *dp = dev_get_drvdata(dev); + struct cdn_dp_device *dp = bridge_to_dp(bridge); int ret; mutex_lock(&dp->lock); @@ -835,10 +801,11 @@ out: mutex_unlock(&dp->lock); } -static int cdn_dp_audio_mute_stream(struct device *dev, void *data, +static int cdn_dp_audio_mute_stream(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction) { - struct cdn_dp_device *dp = dev_get_drvdata(dev); + struct cdn_dp_device *dp = bridge_to_dp(bridge); int ret; mutex_lock(&dp->lock); @@ -854,57 +821,22 @@ out: return ret; } -static int cdn_dp_audio_get_eld(struct device *dev, void *data, - u8 *buf, size_t len) -{ - struct cdn_dp_device *dp = dev_get_drvdata(dev); - - memcpy(buf, dp->connector.eld, min(sizeof(dp->connector.eld), len)); - - return 0; -} - -static int cdn_dp_audio_hook_plugged_cb(struct device *dev, void *data, - hdmi_codec_plugged_cb fn, - struct device *codec_dev) -{ - struct cdn_dp_device *dp = dev_get_drvdata(dev); - - mutex_lock(&dp->lock); - dp->plugged_cb = fn; - dp->codec_dev = codec_dev; - cdn_dp_audio_handle_plugged_change(dp, dp->connected); - mutex_unlock(&dp->lock); - - return 0; -} - -static const struct hdmi_codec_ops audio_codec_ops = { - .hw_params = cdn_dp_audio_hw_params, - .audio_shutdown = cdn_dp_audio_shutdown, - .mute_stream = cdn_dp_audio_mute_stream, - .get_eld = cdn_dp_audio_get_eld, - .hook_plugged_cb = cdn_dp_audio_hook_plugged_cb, +static const struct drm_bridge_funcs cdn_dp_bridge_funcs = { + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .detect = cdn_dp_bridge_detect, + .edid_read = cdn_dp_bridge_edid_read, + .atomic_enable = cdn_dp_bridge_atomic_enable, + .atomic_disable = cdn_dp_bridge_atomic_disable, + .mode_valid = cdn_dp_bridge_mode_valid, + .mode_set = cdn_dp_bridge_mode_set, + + .dp_audio_prepare = cdn_dp_audio_prepare, + .dp_audio_mute_stream = cdn_dp_audio_mute_stream, + .dp_audio_shutdown = cdn_dp_audio_shutdown, }; -static int cdn_dp_audio_codec_init(struct cdn_dp_device *dp, - struct device *dev) -{ - struct hdmi_codec_pdata codec_data = { - .i2s = 1, - .spdif = 1, - .ops = &audio_codec_ops, - .max_i2s_channels = 8, - .no_capture_mute = 1, - }; - - dp->audio_pdev = platform_device_register_data( - dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO, - &codec_data, sizeof(codec_data)); - - return PTR_ERR_OR_ZERO(dp->audio_pdev); -} - static int cdn_dp_request_firmware(struct cdn_dp_device *dp) { int ret; @@ -1006,7 +938,9 @@ static void cdn_dp_pd_event_work(struct work_struct *work) out: mutex_unlock(&dp->lock); - drm_connector_helper_hpd_irq_event(&dp->connector); + drm_bridge_hpd_notify(&dp->bridge, + dp->connected ? connector_status_connected + : connector_status_disconnected); } static int cdn_dp_pd_event(struct notifier_block *nb, @@ -1062,26 +996,35 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data) drm_encoder_helper_add(encoder, &cdn_dp_encoder_helper_funcs); - connector = &dp->connector; - connector->polled = DRM_CONNECTOR_POLL_HPD; - connector->dpms = DRM_MODE_DPMS_OFF; - - ret = drm_connector_init(drm_dev, connector, - &cdn_dp_atomic_connector_funcs, - DRM_MODE_CONNECTOR_DisplayPort); - if (ret) { - DRM_ERROR("failed to initialize connector with drm\n"); - goto err_free_encoder; - } + dp->bridge.ops = + DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_EDID | + DRM_BRIDGE_OP_HPD | + DRM_BRIDGE_OP_DP_AUDIO; + dp->bridge.of_node = dp->dev->of_node; + dp->bridge.type = DRM_MODE_CONNECTOR_DisplayPort; + dp->bridge.hdmi_audio_dev = dp->dev; + dp->bridge.hdmi_audio_max_i2s_playback_channels = 8; + dp->bridge.hdmi_audio_spdif_playback = 1; + dp->bridge.hdmi_audio_dai_port = -1; + + ret = devm_drm_bridge_add(dev, &dp->bridge); + if (ret) + return ret; - drm_connector_helper_add(connector, &cdn_dp_connector_helper_funcs); + ret = drm_bridge_attach(encoder, &dp->bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) + return ret; - ret = drm_connector_attach_encoder(connector, encoder); - if (ret) { - DRM_ERROR("failed to attach connector and encoder\n"); - goto err_free_connector; + connector = drm_bridge_connector_init(drm_dev, encoder); + if (IS_ERR(connector)) { + ret = PTR_ERR(connector); + dev_err(dp->dev, "failed to init bridge connector: %d\n", ret); + return ret; } + drm_connector_attach_encoder(connector, encoder); + for (i = 0; i < dp->ports; i++) { port = dp->port[i]; @@ -1092,7 +1035,7 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data) if (ret) { DRM_DEV_ERROR(dev, "register EXTCON_DISP_DP notifier err\n"); - goto err_free_connector; + return ret; } } @@ -1101,30 +1044,19 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data) schedule_work(&dp->event_work); return 0; - -err_free_connector: - drm_connector_cleanup(connector); -err_free_encoder: - drm_encoder_cleanup(encoder); - return ret; } static void cdn_dp_unbind(struct device *dev, struct device *master, void *data) { struct cdn_dp_device *dp = dev_get_drvdata(dev); struct drm_encoder *encoder = &dp->encoder.encoder; - struct drm_connector *connector = &dp->connector; cancel_work_sync(&dp->event_work); - cdn_dp_encoder_disable(encoder); encoder->funcs->destroy(encoder); - connector->funcs->destroy(connector); pm_runtime_disable(dev); if (dp->fw_loaded) release_firmware(dp->fw); - drm_edid_free(dp->drm_edid); - dp->drm_edid = NULL; } static const struct component_ops cdn_dp_component_ops = { @@ -1171,9 +1103,10 @@ static int cdn_dp_probe(struct platform_device *pdev) int ret; int i; - dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); - if (!dp) - return -ENOMEM; + dp = devm_drm_bridge_alloc(dev, struct cdn_dp_device, bridge, + &cdn_dp_bridge_funcs); + if (IS_ERR(dp)) + return PTR_ERR(dp); dp->dev = dev; match = of_match_node(cdn_dp_dt_ids, pdev->dev.of_node); @@ -1209,19 +1142,11 @@ static int cdn_dp_probe(struct platform_device *pdev) mutex_init(&dp->lock); dev_set_drvdata(dev, dp); - ret = cdn_dp_audio_codec_init(dp, dev); - if (ret) - return ret; - ret = component_add(dev, &cdn_dp_component_ops); if (ret) - goto err_audio_deinit; + return ret; return 0; - -err_audio_deinit: - platform_device_unregister(dp->audio_pdev); - return ret; } static void cdn_dp_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.h b/drivers/gpu/drm/rockchip/cdn-dp-core.h index 17498f576ce7..e9c30b9fd543 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.h +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.h @@ -8,6 +8,7 @@ #define _CDN_DP_CORE_H #include <drm/display/drm_dp_helper.h> +#include <drm/drm_bridge.h> #include <drm/drm_panel.h> #include <drm/drm_probe_helper.h> #include <sound/hdmi-codec.h> @@ -65,12 +66,11 @@ struct cdn_dp_port { struct cdn_dp_device { struct device *dev; struct drm_device *drm_dev; - struct drm_connector connector; + struct drm_bridge bridge; struct rockchip_encoder encoder; struct drm_display_mode mode; struct platform_device *audio_pdev; struct work_struct event_work; - const struct drm_edid *drm_edid; struct mutex lock; bool connected; @@ -101,9 +101,5 @@ struct cdn_dp_device { int active_port; u8 dpcd[DP_RECEIVER_CAP_SIZE]; - bool sink_has_audio; - - hdmi_codec_plugged_cb plugged_cb; - struct device *codec_dev; }; #endif /* _CDN_DP_CORE_H */ diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 3398160ad75e..5523911b990d 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -7,6 +7,7 @@ */ #include <linux/clk.h> +#include <linux/hw_bitfield.h> #include <linux/iopoll.h> #include <linux/math64.h> #include <linux/mfd/syscon.h> @@ -148,7 +149,7 @@ #define DW_MIPI_NEEDS_GRF_CLK BIT(1) #define PX30_GRF_PD_VO_CON1 0x0438 -#define PX30_DSI_FORCETXSTOPMODE (0xf << 7) +#define PX30_DSI_FORCETXSTOPMODE (0xfUL << 7) #define PX30_DSI_FORCERXMODE BIT(6) #define PX30_DSI_TURNDISABLE BIT(5) #define PX30_DSI_LCDC_SEL BIT(0) @@ -167,16 +168,16 @@ #define RK3399_DSI1_LCDC_SEL BIT(4) #define RK3399_GRF_SOC_CON22 0x6258 -#define RK3399_DSI0_TURNREQUEST (0xf << 12) -#define RK3399_DSI0_TURNDISABLE (0xf << 8) -#define RK3399_DSI0_FORCETXSTOPMODE (0xf << 4) -#define RK3399_DSI0_FORCERXMODE (0xf << 0) +#define RK3399_DSI0_TURNREQUEST (0xfUL << 12) +#define RK3399_DSI0_TURNDISABLE (0xfUL << 8) +#define RK3399_DSI0_FORCETXSTOPMODE (0xfUL << 4) +#define RK3399_DSI0_FORCERXMODE (0xfUL << 0) #define RK3399_GRF_SOC_CON23 0x625c -#define RK3399_DSI1_TURNDISABLE (0xf << 12) -#define RK3399_DSI1_FORCETXSTOPMODE (0xf << 8) -#define RK3399_DSI1_FORCERXMODE (0xf << 4) -#define RK3399_DSI1_ENABLE (0xf << 0) +#define RK3399_DSI1_TURNDISABLE (0xfUL << 12) +#define RK3399_DSI1_FORCETXSTOPMODE (0xfUL << 8) +#define RK3399_DSI1_FORCERXMODE (0xfUL << 4) +#define RK3399_DSI1_ENABLE (0xfUL << 0) #define RK3399_GRF_SOC_CON24 0x6260 #define RK3399_TXRX_MASTERSLAVEZ BIT(7) @@ -186,8 +187,8 @@ #define RK3399_TXRX_TURNREQUEST GENMASK(3, 0) #define RK3568_GRF_VO_CON2 0x0368 -#define RK3568_DSI0_SKEWCALHS (0x1f << 11) -#define RK3568_DSI0_FORCETXSTOPMODE (0xf << 4) +#define RK3568_DSI0_SKEWCALHS (0x1fUL << 11) +#define RK3568_DSI0_FORCETXSTOPMODE (0xfUL << 4) #define RK3568_DSI0_TURNDISABLE BIT(2) #define RK3568_DSI0_FORCERXMODE BIT(0) @@ -197,18 +198,16 @@ * come from. Name GRF_VO_CON3 is assumed. */ #define RK3568_GRF_VO_CON3 0x36c -#define RK3568_DSI1_SKEWCALHS (0x1f << 11) -#define RK3568_DSI1_FORCETXSTOPMODE (0xf << 4) +#define RK3568_DSI1_SKEWCALHS (0x1fUL << 11) +#define RK3568_DSI1_FORCETXSTOPMODE (0xfUL << 4) #define RK3568_DSI1_TURNDISABLE BIT(2) #define RK3568_DSI1_FORCERXMODE BIT(0) #define RV1126_GRF_DSIPHY_CON 0x10220 -#define RV1126_DSI_FORCETXSTOPMODE (0xf << 4) +#define RV1126_DSI_FORCETXSTOPMODE (0xfUL << 4) #define RV1126_DSI_TURNDISABLE BIT(2) #define RV1126_DSI_FORCERXMODE BIT(0) -#define HIWORD_UPDATE(val, mask) (val | (mask) << 16) - enum { DW_DSI_USAGE_IDLE, DW_DSI_USAGE_DSI, @@ -1484,14 +1483,13 @@ static const struct rockchip_dw_dsi_chip_data px30_chip_data[] = { { .reg = 0xff450000, .lcdsel_grf_reg = PX30_GRF_PD_VO_CON1, - .lcdsel_big = HIWORD_UPDATE(0, PX30_DSI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(PX30_DSI_LCDC_SEL, - PX30_DSI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(PX30_DSI_LCDC_SEL, 1), .lanecfg1_grf_reg = PX30_GRF_PD_VO_CON1, - .lanecfg1 = HIWORD_UPDATE(0, PX30_DSI_TURNDISABLE | - PX30_DSI_FORCERXMODE | - PX30_DSI_FORCETXSTOPMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((PX30_DSI_TURNDISABLE | + PX30_DSI_FORCERXMODE | + PX30_DSI_FORCETXSTOPMODE), 0), .max_data_lanes = 4, }, @@ -1502,9 +1500,9 @@ static const struct rockchip_dw_dsi_chip_data rk3128_chip_data[] = { { .reg = 0x10110000, .lanecfg1_grf_reg = RK3128_GRF_LVDS_CON0, - .lanecfg1 = HIWORD_UPDATE(0, RK3128_DSI_TURNDISABLE | - RK3128_DSI_FORCERXMODE | - RK3128_DSI_FORCETXSTOPMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3128_DSI_TURNDISABLE | + RK3128_DSI_FORCERXMODE | + RK3128_DSI_FORCETXSTOPMODE), 0), .max_data_lanes = 4, }, { /* sentinel */ } @@ -1514,16 +1512,16 @@ static const struct rockchip_dw_dsi_chip_data rk3288_chip_data[] = { { .reg = 0xff960000, .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI0_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI0_LCDC_SEL, RK3288_DSI0_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI0_LCDC_SEL, 1), .max_data_lanes = 4, }, { .reg = 0xff964000, .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_DSI1_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_DSI1_LCDC_SEL, RK3288_DSI1_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_DSI1_LCDC_SEL, 1), .max_data_lanes = 4, }, @@ -1539,13 +1537,13 @@ static int rk3399_dphy_tx1rx1_init(struct phy *phy) * Assume ISP0 is supplied by the RX0 dphy. */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_SRC_SEL_ISP0)); + FIELD_PREP_WM16(RK3399_TXRX_SRC_SEL_ISP0, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_BASEDIR)); + FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0)); return 0; } @@ -1559,21 +1557,20 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy) usleep_range(100, 150); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + FIELD_PREP_WM16(RK3399_TXRX_MASTERSLAVEZ, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(RK3399_TXRX_BASEDIR, RK3399_TXRX_BASEDIR)); + FIELD_PREP_WM16(RK3399_TXRX_BASEDIR, 1)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_FORCERXMODE)); + FIELD_PREP_WM16(RK3399_DSI1_FORCERXMODE, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_FORCETXSTOPMODE)); + FIELD_PREP_WM16(RK3399_DSI1_FORCETXSTOPMODE, 0)); /* Disable lane turn around, which is ignored in receive mode */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, - HIWORD_UPDATE(0, RK3399_TXRX_TURNREQUEST)); + FIELD_PREP_WM16(RK3399_TXRX_TURNREQUEST, 0)); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(RK3399_DSI1_TURNDISABLE, - RK3399_DSI1_TURNDISABLE)); + FIELD_PREP_WM16(RK3399_DSI1_TURNDISABLE, 0xf)); usleep_range(100, 150); dsi_write(dsi, DSI_PHY_TST_CTRL0, PHY_TESTCLK | PHY_UNTESTCLR); @@ -1581,8 +1578,8 @@ static int rk3399_dphy_tx1rx1_power_on(struct phy *phy) /* Enable dphy lanes */ regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(GENMASK(dsi->dphy_config.lanes - 1, 0), - RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, + GENMASK(dsi->dphy_config.lanes - 1, 0))); usleep_range(100, 150); @@ -1594,7 +1591,7 @@ static int rk3399_dphy_tx1rx1_power_off(struct phy *phy) struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, - HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + FIELD_PREP_WM16(RK3399_DSI1_ENABLE, 0)); return 0; } @@ -1603,15 +1600,14 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { { .reg = 0xff960000, .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI0_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI0_LCDC_SEL, - RK3399_DSI0_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI0_LCDC_SEL, 1), .lanecfg1_grf_reg = RK3399_GRF_SOC_CON22, - .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI0_TURNREQUEST | - RK3399_DSI0_TURNDISABLE | - RK3399_DSI0_FORCETXSTOPMODE | - RK3399_DSI0_FORCERXMODE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI0_TURNREQUEST | + RK3399_DSI0_TURNDISABLE | + RK3399_DSI0_FORCETXSTOPMODE | + RK3399_DSI0_FORCERXMODE), 0), .flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK, .max_data_lanes = 4, @@ -1619,25 +1615,23 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { { .reg = 0xff968000, .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_DSI1_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_DSI1_LCDC_SEL, - RK3399_DSI1_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_DSI1_LCDC_SEL, 1), + .lanecfg1_grf_reg = RK3399_GRF_SOC_CON23, - .lanecfg1 = HIWORD_UPDATE(0, RK3399_DSI1_TURNDISABLE | - RK3399_DSI1_FORCETXSTOPMODE | - RK3399_DSI1_FORCERXMODE | - RK3399_DSI1_ENABLE), + .lanecfg1 = FIELD_PREP_WM16_CONST((RK3399_DSI1_TURNDISABLE | + RK3399_DSI1_FORCETXSTOPMODE | + RK3399_DSI1_FORCERXMODE | + RK3399_DSI1_ENABLE), 0), .lanecfg2_grf_reg = RK3399_GRF_SOC_CON24, - .lanecfg2 = HIWORD_UPDATE(RK3399_TXRX_MASTERSLAVEZ | - RK3399_TXRX_ENABLECLK, - RK3399_TXRX_MASTERSLAVEZ | - RK3399_TXRX_ENABLECLK | - RK3399_TXRX_BASEDIR), + .lanecfg2 = (FIELD_PREP_WM16_CONST(RK3399_TXRX_MASTERSLAVEZ, 1) | + FIELD_PREP_WM16_CONST(RK3399_TXRX_ENABLECLK, 1) | + FIELD_PREP_WM16_CONST(RK3399_TXRX_BASEDIR, 0)), .enable_grf_reg = RK3399_GRF_SOC_CON23, - .enable = HIWORD_UPDATE(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE), + .enable = FIELD_PREP_WM16_CONST(RK3399_DSI1_ENABLE, RK3399_DSI1_ENABLE), .flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK, .max_data_lanes = 4, @@ -1653,19 +1647,19 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { { .reg = 0xfe060000, .lanecfg1_grf_reg = RK3568_GRF_VO_CON2, - .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS | - RK3568_DSI0_FORCETXSTOPMODE | - RK3568_DSI0_TURNDISABLE | - RK3568_DSI0_FORCERXMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI0_SKEWCALHS, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCETXSTOPMODE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI0_FORCERXMODE, 0)), .max_data_lanes = 4, }, { .reg = 0xfe070000, .lanecfg1_grf_reg = RK3568_GRF_VO_CON3, - .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS | - RK3568_DSI1_FORCETXSTOPMODE | - RK3568_DSI1_TURNDISABLE | - RK3568_DSI1_FORCERXMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RK3568_DSI1_SKEWCALHS, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCETXSTOPMODE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RK3568_DSI1_FORCERXMODE, 0)), .max_data_lanes = 4, }, { /* sentinel */ } @@ -1675,9 +1669,9 @@ static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = { { .reg = 0xffb30000, .lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON, - .lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE | - RV1126_DSI_FORCERXMODE | - RV1126_DSI_FORCETXSTOPMODE), + .lanecfg1 = (FIELD_PREP_WM16_CONST(RV1126_DSI_TURNDISABLE, 0) | + FIELD_PREP_WM16_CONST(RV1126_DSI_FORCERXMODE, 0) | + FIELD_PREP_WM16_CONST(RV1126_DSI_FORCETXSTOPMODE, 0)), .max_data_lanes = 4, }, { /* sentinel */ } diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c index cdd490778756..0aea764e29b2 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c @@ -437,6 +437,15 @@ static void dw_mipi_dsi2_rockchip_remove(struct platform_device *pdev) dw_mipi_dsi2_remove(dsi2->dmd); } +static const struct dsigrf_reg rk3576_dsi_grf_reg_fields[MAX_FIELDS] = { + [TXREQCLKHS_EN] = { 0x0028, 1, 1 }, + [GATING_EN] = { 0x0028, 0, 0 }, + [IPI_SHUTDN] = { 0x0028, 3, 3 }, + [IPI_COLORM] = { 0x0028, 2, 2 }, + [IPI_COLOR_DEPTH] = { 0x0028, 8, 11 }, + [IPI_FORMAT] = { 0x0028, 4, 7 }, +}; + static const struct dsigrf_reg rk3588_dsi0_grf_reg_fields[MAX_FIELDS] = { [TXREQCLKHS_EN] = { 0x0000, 11, 11 }, [GATING_EN] = { 0x0000, 10, 10 }, @@ -455,6 +464,15 @@ static const struct dsigrf_reg rk3588_dsi1_grf_reg_fields[MAX_FIELDS] = { [IPI_FORMAT] = { 0x0004, 0, 3 }, }; +static const struct rockchip_dw_dsi2_chip_data rk3576_chip_data[] = { + { + .reg = 0x27d80000, + .grf_regs = rk3576_dsi_grf_reg_fields, + .max_bit_rate_per_lane = 2500000ULL, + }, + { /* sentinel */ } +}; + static const struct rockchip_dw_dsi2_chip_data rk3588_chip_data[] = { { .reg = 0xfde20000, @@ -470,6 +488,9 @@ static const struct rockchip_dw_dsi2_chip_data rk3588_chip_data[] = { static const struct of_device_id dw_mipi_dsi2_rockchip_dt_ids[] = { { + .compatible = "rockchip,rk3576-mipi-dsi2", + .data = &rk3576_chip_data, + }, { .compatible = "rockchip,rk3588-mipi-dsi2", .data = &rk3588_chip_data, }, diff --git a/drivers/gpu/drm/rockchip/dw_dp-rockchip.c b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c new file mode 100644 index 000000000000..25ab4e46301e --- /dev/null +++ b/drivers/gpu/drm/rockchip/dw_dp-rockchip.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020 Rockchip Electronics Co., Ltd. + * + * Author: Zhang Yubing <yubing.zhang@rock-chips.com> + * Author: Andy Yan <andy.yan@rock-chips.com> + */ + +#include <linux/component.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <drm/bridge/dw_dp.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> +#include <drm/drm_of.h> +#include <drm/drm_print.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> + +#include <linux/media-bus-format.h> +#include <linux/videodev2.h> + +#include "rockchip_drm_drv.h" +#include "rockchip_drm_vop.h" + +struct rockchip_dw_dp { + struct dw_dp *base; + struct device *dev; + struct rockchip_encoder encoder; +}; + +static int dw_dp_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state); + struct drm_atomic_state *state = conn_state->state; + struct drm_display_info *di = &conn_state->connector->display_info; + struct drm_bridge *bridge = drm_bridge_chain_get_first_bridge(encoder); + struct drm_bridge_state *bridge_state = drm_atomic_get_new_bridge_state(state, bridge); + u32 bus_format = bridge_state->input_bus_cfg.format; + + switch (bus_format) { + case MEDIA_BUS_FMT_UYYVYY10_0_5X30: + case MEDIA_BUS_FMT_UYYVYY8_0_5X24: + s->output_mode = ROCKCHIP_OUT_MODE_YUV420; + break; + case MEDIA_BUS_FMT_YUYV10_1X20: + case MEDIA_BUS_FMT_YUYV8_1X16: + s->output_mode = ROCKCHIP_OUT_MODE_S888_DUMMY; + break; + case MEDIA_BUS_FMT_RGB101010_1X30: + case MEDIA_BUS_FMT_RGB888_1X24: + case MEDIA_BUS_FMT_RGB666_1X24_CPADHI: + case MEDIA_BUS_FMT_YUV10_1X30: + case MEDIA_BUS_FMT_YUV8_1X24: + default: + s->output_mode = ROCKCHIP_OUT_MODE_AAAA; + break; + } + + s->output_type = DRM_MODE_CONNECTOR_DisplayPort; + s->bus_format = bus_format; + s->bus_flags = di->bus_flags; + s->color_space = V4L2_COLORSPACE_DEFAULT; + + return 0; +} + +static const struct drm_encoder_helper_funcs dw_dp_encoder_helper_funcs = { + .atomic_check = dw_dp_encoder_atomic_check, +}; + +static int dw_dp_rockchip_bind(struct device *dev, struct device *master, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct dw_dp_plat_data plat_data; + struct drm_device *drm_dev = data; + struct rockchip_dw_dp *dp; + struct drm_encoder *encoder; + struct drm_connector *connector; + int ret; + + dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); + if (!dp) + return -ENOMEM; + + dp->dev = dev; + platform_set_drvdata(pdev, dp); + + plat_data.max_link_rate = 810000; + encoder = &dp->encoder.encoder; + encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev, dev->of_node); + rockchip_drm_encoder_set_crtc_endpoint_id(&dp->encoder, dev->of_node, 0, 0); + + ret = drmm_encoder_init(drm_dev, encoder, NULL, DRM_MODE_ENCODER_TMDS, NULL); + if (ret) + return ret; + drm_encoder_helper_add(encoder, &dw_dp_encoder_helper_funcs); + + dp->base = dw_dp_bind(dev, encoder, &plat_data); + if (IS_ERR(dp->base)) { + ret = PTR_ERR(dp->base); + return ret; + } + + connector = drm_bridge_connector_init(drm_dev, encoder); + if (IS_ERR(connector)) { + ret = PTR_ERR(connector); + return dev_err_probe(dev, ret, "Failed to init bridge connector"); + } + + drm_connector_attach_encoder(connector, encoder); + + return 0; +} + +static const struct component_ops dw_dp_rockchip_component_ops = { + .bind = dw_dp_rockchip_bind, +}; + +static int dw_dp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + return component_add(dev, &dw_dp_rockchip_component_ops); +} + +static void dw_dp_remove(struct platform_device *pdev) +{ + struct rockchip_dw_dp *dp = platform_get_drvdata(pdev); + + component_del(dp->dev, &dw_dp_rockchip_component_ops); +} + +static const struct of_device_id dw_dp_of_match[] = { + { .compatible = "rockchip,rk3588-dp", }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dp_of_match); + +struct platform_driver dw_dp_driver = { + .probe = dw_dp_probe, + .remove = dw_dp_remove, + .driver = { + .name = "dw-dp", + .of_match_table = dw_dp_of_match, + }, +}; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index f737e7d46e66..727cdf768161 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -4,6 +4,7 @@ */ #include <linux/clk.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -54,8 +55,6 @@ #define RK3568_HDMI_SDAIN_MSK BIT(15) #define RK3568_HDMI_SCLIN_MSK BIT(14) -#define HIWORD_UPDATE(val, mask) (val | (mask) << 16) - /** * struct rockchip_hdmi_chip_data - splite the grf setting of kind of chips * @lcdsel_grf_reg: grf register offset of lcdc select @@ -213,17 +212,13 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi *hdmi) if (IS_ERR(hdmi->ref_clk)) { ret = PTR_ERR(hdmi->ref_clk); - if (ret != -EPROBE_DEFER) - dev_err(hdmi->dev, "failed to get reference clock\n"); - return ret; + return dev_err_probe(hdmi->dev, ret, "failed to get reference clock\n"); } hdmi->grf_clk = devm_clk_get_optional(hdmi->dev, "grf"); if (IS_ERR(hdmi->grf_clk)) { ret = PTR_ERR(hdmi->grf_clk); - if (ret != -EPROBE_DEFER) - dev_err(hdmi->dev, "failed to get grf clock\n"); - return ret; + return dev_err_probe(hdmi->dev, ret, "failed to get grf clock\n"); } ret = devm_regulator_get_enable(hdmi->dev, "avdd-0v9"); @@ -359,17 +354,14 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) dw_hdmi_phy_setup_hpd(dw_hdmi, data); - regmap_write(hdmi->regmap, - RK3228_GRF_SOC_CON6, - HIWORD_UPDATE(RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL | - RK3228_HDMI_SCL_VSEL, - RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL | - RK3228_HDMI_SCL_VSEL)); - - regmap_write(hdmi->regmap, - RK3228_GRF_SOC_CON2, - HIWORD_UPDATE(RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK, - RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK)); + regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON6, + FIELD_PREP_WM16(RK3228_HDMI_HPD_VSEL, 1) | + FIELD_PREP_WM16(RK3228_HDMI_SDA_VSEL, 1) | + FIELD_PREP_WM16(RK3228_HDMI_SCL_VSEL, 1)); + + regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2, + FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3228_HDMI_SCLIN_MSK, 1)); } static enum drm_connector_status @@ -381,15 +373,13 @@ dw_hdmi_rk3328_read_hpd(struct dw_hdmi *dw_hdmi, void *data) status = dw_hdmi_phy_read_hpd(dw_hdmi, data); if (status == connector_status_connected) - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V, - RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 1) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 1)); else - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(0, RK3328_HDMI_SDA_5V | - RK3328_HDMI_SCL_5V)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0)); return status; } @@ -400,21 +390,21 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) dw_hdmi_phy_setup_hpd(dw_hdmi, data); /* Enable and map pins to 3V grf-controlled io-voltage */ - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON4, - HIWORD_UPDATE(0, RK3328_HDMI_HPD_SARADC | RK3328_HDMI_CEC_5V | - RK3328_HDMI_SDA_5V | RK3328_HDMI_SCL_5V | - RK3328_HDMI_HPD_5V)); - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON3, - HIWORD_UPDATE(0, RK3328_HDMI_SDA5V_GRF | RK3328_HDMI_SCL5V_GRF | - RK3328_HDMI_HPD5V_GRF | - RK3328_HDMI_CEC5V_GRF)); - regmap_write(hdmi->regmap, - RK3328_GRF_SOC_CON2, - HIWORD_UPDATE(RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK, - RK3328_HDMI_SDAIN_MSK | RK3328_HDMI_SCLIN_MSK | - RK3328_HDMI_HPD_IOE)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON4, + FIELD_PREP_WM16(RK3328_HDMI_HPD_SARADC, 0) | + FIELD_PREP_WM16(RK3328_HDMI_CEC_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SDA_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL_5V, 0) | + FIELD_PREP_WM16(RK3328_HDMI_HPD_5V, 0)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON3, + FIELD_PREP_WM16(RK3328_HDMI_SDA5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_SCL5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_HPD5V_GRF, 0) | + FIELD_PREP_WM16(RK3328_HDMI_CEC5V_GRF, 0)); + regmap_write(hdmi->regmap, RK3328_GRF_SOC_CON2, + FIELD_PREP_WM16(RK3328_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1) | + FIELD_PREP_WM16(RK3328_HDMI_HPD_IOE, 0)); dw_hdmi_rk3328_read_hpd(dw_hdmi, data); } @@ -442,8 +432,8 @@ static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = { static struct rockchip_hdmi_chip_data rk3288_chip_data = { .lcdsel_grf_reg = RK3288_GRF_SOC_CON6, - .lcdsel_big = HIWORD_UPDATE(0, RK3288_HDMI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3288_HDMI_LCDC_SEL, RK3288_HDMI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3288_HDMI_LCDC_SEL, 1), .max_tmds_clock = 340000, }; @@ -479,8 +469,8 @@ static const struct dw_hdmi_plat_data rk3328_hdmi_drv_data = { static struct rockchip_hdmi_chip_data rk3399_chip_data = { .lcdsel_grf_reg = RK3399_GRF_SOC_CON20, - .lcdsel_big = HIWORD_UPDATE(0, RK3399_HDMI_LCDC_SEL), - .lcdsel_lit = HIWORD_UPDATE(RK3399_HDMI_LCDC_SEL, RK3399_HDMI_LCDC_SEL), + .lcdsel_big = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 0), + .lcdsel_lit = FIELD_PREP_WM16_CONST(RK3399_HDMI_LCDC_SEL, 1), .max_tmds_clock = 594000, }; @@ -573,17 +563,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, ret = rockchip_hdmi_parse_dt(hdmi); if (ret) { - if (ret != -EPROBE_DEFER) - dev_err(hdmi->dev, "Unable to parse OF data\n"); - return ret; + return dev_err_probe(hdmi->dev, ret, "Unable to parse OF data\n"); } hdmi->phy = devm_phy_optional_get(dev, "hdmi"); if (IS_ERR(hdmi->phy)) { ret = PTR_ERR(hdmi->phy); - if (ret != -EPROBE_DEFER) - dev_err(hdmi->dev, "failed to get phy\n"); - return ret; + return dev_err_probe(hdmi->dev, ret, "failed to get phy\n"); } if (hdmi->phy) { @@ -597,10 +583,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, if (hdmi->chip_data == &rk3568_chip_data) { regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1, - HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK | - RK3568_HDMI_SCLIN_MSK, - RK3568_HDMI_SDAIN_MSK | - RK3568_HDMI_SCLIN_MSK)); + FIELD_PREP_WM16(RK3568_HDMI_SDAIN_MSK, 1) | + FIELD_PREP_WM16(RK3568_HDMI_SCLIN_MSK, 1)); } drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index 7d531b6f4c09..ed6e8f036f4b 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -9,6 +9,7 @@ #include <linux/clk.h> #include <linux/gpio/consumer.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -66,7 +67,8 @@ #define RK3588_HDMI1_HPD_INT_MSK BIT(15) #define RK3588_HDMI1_HPD_INT_CLR BIT(14) #define RK3588_GRF_SOC_CON7 0x031c -#define RK3588_SET_HPD_PATH_MASK GENMASK(13, 12) +#define RK3588_HPD_HDMI0_IO_EN_MASK BIT(12) +#define RK3588_HPD_HDMI1_IO_EN_MASK BIT(13) #define RK3588_GRF_SOC_STATUS1 0x0384 #define RK3588_HDMI0_LEVEL_INT BIT(16) #define RK3588_HDMI1_LEVEL_INT BIT(24) @@ -80,7 +82,6 @@ #define RK3588_HDMI0_GRANT_SEL BIT(10) #define RK3588_HDMI1_GRANT_SEL BIT(12) -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) #define HOTPLUG_DEBOUNCE_MS 150 #define MAX_HDMI_PORT_NUM 2 @@ -185,11 +186,11 @@ static void dw_hdmi_qp_rk3588_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) u32 val; if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, - RK3588_HDMI1_HPD_INT_CLR | RK3588_HDMI1_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0)); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR | RK3588_HDMI0_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0)); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); } @@ -218,8 +219,8 @@ static void dw_hdmi_qp_rk3576_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) struct rockchip_hdmi_qp *hdmi = (struct rockchip_hdmi_qp *)data; u32 val; - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR, - RK3576_HDMI_HPD_INT_CLR | RK3576_HDMI_HPD_INT_MSK); + val = (FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1) | + FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0)); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); regmap_write(hdmi->regmap, 0xa404, 0xffff0102); @@ -254,7 +255,7 @@ static irqreturn_t dw_hdmi_qp_rk3576_hardirq(int irq, void *dev_id) regmap_read(hdmi->regmap, RK3576_IOC_HDMI_HPD_STATUS, &intr_stat); if (intr_stat) { - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_MSK, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); return IRQ_WAKE_THREAD; @@ -273,12 +274,12 @@ static irqreturn_t dw_hdmi_qp_rk3576_irq(int irq, void *dev_id) if (!intr_stat) return IRQ_NONE; - val = HIWORD_UPDATE(RK3576_HDMI_HPD_INT_CLR, RK3576_HDMI_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_CLR, 1); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); mod_delayed_work(system_wq, &hdmi->hpd_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); - val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); return IRQ_HANDLED; @@ -293,11 +294,9 @@ static irqreturn_t dw_hdmi_qp_rk3588_hardirq(int irq, void *dev_id) if (intr_stat) { if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, - RK3588_HDMI1_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, - RK3588_HDMI0_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_WAKE_THREAD; } @@ -315,20 +314,18 @@ static irqreturn_t dw_hdmi_qp_rk3588_irq(int irq, void *dev_id) return IRQ_NONE; if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, - RK3588_HDMI1_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_CLR, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_CLR, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); mod_delayed_work(system_wq, &hdmi->hpd_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); if (hdmi->port_id) - val |= HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK); + val |= FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 0); else - val |= HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK); + val |= FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_HANDLED; @@ -338,14 +335,14 @@ static void dw_hdmi_qp_rk3576_io_init(struct rockchip_hdmi_qp *hdmi) { u32 val; - val = HIWORD_UPDATE(RK3576_SCLIN_MASK, RK3576_SCLIN_MASK) | - HIWORD_UPDATE(RK3576_SDAIN_MASK, RK3576_SDAIN_MASK) | - HIWORD_UPDATE(RK3576_HDMI_GRANT_SEL, RK3576_HDMI_GRANT_SEL) | - HIWORD_UPDATE(RK3576_I2S_SEL_MASK, RK3576_I2S_SEL_MASK); + val = FIELD_PREP_WM16(RK3576_SCLIN_MASK, 1) | + FIELD_PREP_WM16(RK3576_SDAIN_MASK, 1) | + FIELD_PREP_WM16(RK3576_HDMI_GRANT_SEL, 1) | + FIELD_PREP_WM16(RK3576_I2S_SEL_MASK, 1); regmap_write(hdmi->vo_regmap, RK3576_VO0_GRF_SOC_CON14, val); - val = HIWORD_UPDATE(0, RK3576_HDMI_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3576_HDMI_HPD_INT_MSK, 0); regmap_write(hdmi->regmap, RK3576_IOC_MISC_CON0, val); } @@ -353,27 +350,28 @@ static void dw_hdmi_qp_rk3588_io_init(struct rockchip_hdmi_qp *hdmi) { u32 val; - val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) | - HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | - HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | - HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); + val = FIELD_PREP_WM16(RK3588_SCLIN_MASK, 1) | + FIELD_PREP_WM16(RK3588_SDAIN_MASK, 1) | + FIELD_PREP_WM16(RK3588_MODE_MASK, 1) | + FIELD_PREP_WM16(RK3588_I2S_SEL_MASK, 1); regmap_write(hdmi->vo_regmap, hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, val); - val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, RK3588_SET_HPD_PATH_MASK); + val = FIELD_PREP_WM16(RK3588_HPD_HDMI0_IO_EN_MASK, 1) | + FIELD_PREP_WM16(RK3588_HPD_HDMI1_IO_EN_MASK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, RK3588_HDMI1_GRANT_SEL); + val = FIELD_PREP_WM16(RK3588_HDMI1_GRANT_SEL, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, RK3588_HDMI0_GRANT_SEL); + val = FIELD_PREP_WM16(RK3588_HDMI0_GRANT_SEL, 1); regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, RK3588_HDMI1_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI1_HPD_INT_MSK, 1); else - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK); + val = FIELD_PREP_WM16(RK3588_HDMI0_HPD_INT_MSK, 1); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); } diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index db4b4038e51d..f24827dc1421 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -10,6 +10,7 @@ #include <linux/delay.h> #include <linux/err.h> #include <linux/hdmi.h> +#include <linux/hw_bitfield.h> #include <linux/mfd/syscon.h> #include <linux/mod_devicetable.h> #include <linux/module.h> @@ -29,11 +30,358 @@ #include "rockchip_drm_drv.h" -#include "inno_hdmi.h" +#define INNO_HDMI_MIN_TMDS_CLOCK 25000000U -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) +#define DDC_SEGMENT_ADDR 0x30 + +#define HDMI_SCL_RATE (100 * 1000) + +#define DDC_BUS_FREQ_L 0x4b +#define DDC_BUS_FREQ_H 0x4c + +#define HDMI_SYS_CTRL 0x00 +#define m_RST_ANALOG BIT(6) +#define v_RST_ANALOG (0 << 6) +#define v_NOT_RST_ANALOG BIT(6) +#define m_RST_DIGITAL BIT(5) +#define v_RST_DIGITAL (0 << 5) +#define v_NOT_RST_DIGITAL BIT(5) +#define m_REG_CLK_INV BIT(4) +#define v_REG_CLK_NOT_INV (0 << 4) +#define v_REG_CLK_INV BIT(4) +#define m_VCLK_INV BIT(3) +#define v_VCLK_NOT_INV (0 << 3) +#define v_VCLK_INV BIT(3) +#define m_REG_CLK_SOURCE BIT(2) +#define v_REG_CLK_SOURCE_TMDS (0 << 2) +#define v_REG_CLK_SOURCE_SYS BIT(2) +#define m_POWER BIT(1) +#define v_PWR_ON (0 << 1) +#define v_PWR_OFF BIT(1) +#define m_INT_POL BIT(0) +#define v_INT_POL_HIGH 1 +#define v_INT_POL_LOW 0 + +#define HDMI_VIDEO_CONTRL1 0x01 +#define m_VIDEO_INPUT_FORMAT (7 << 1) +#define m_DE_SOURCE BIT(0) +#define v_VIDEO_INPUT_FORMAT(n) ((n) << 1) +#define v_DE_EXTERNAL 1 +#define v_DE_INTERNAL 0 +enum { + VIDEO_INPUT_SDR_RGB444 = 0, + VIDEO_INPUT_DDR_RGB444 = 5, + VIDEO_INPUT_DDR_YCBCR422 = 6 +}; -#define INNO_HDMI_MIN_TMDS_CLOCK 25000000U +#define HDMI_VIDEO_CONTRL2 0x02 +#define m_VIDEO_OUTPUT_COLOR (3 << 6) +#define m_VIDEO_INPUT_BITS (3 << 4) +#define m_VIDEO_INPUT_CSP BIT(0) +#define v_VIDEO_OUTPUT_COLOR(n) (((n) & 0x3) << 6) +#define v_VIDEO_INPUT_BITS(n) ((n) << 4) +#define v_VIDEO_INPUT_CSP(n) ((n) << 0) +enum { + VIDEO_INPUT_12BITS = 0, + VIDEO_INPUT_10BITS = 1, + VIDEO_INPUT_REVERT = 2, + VIDEO_INPUT_8BITS = 3, +}; + +#define HDMI_VIDEO_CONTRL 0x03 +#define m_VIDEO_AUTO_CSC BIT(7) +#define v_VIDEO_AUTO_CSC(n) ((n) << 7) +#define m_VIDEO_C0_C2_SWAP BIT(0) +#define v_VIDEO_C0_C2_SWAP(n) ((n) << 0) +enum { + C0_C2_CHANGE_ENABLE = 0, + C0_C2_CHANGE_DISABLE = 1, + AUTO_CSC_DISABLE = 0, + AUTO_CSC_ENABLE = 1, +}; + +#define HDMI_VIDEO_CONTRL3 0x04 +#define m_COLOR_DEPTH_NOT_INDICATED BIT(4) +#define m_SOF BIT(3) +#define m_COLOR_RANGE BIT(2) +#define m_CSC BIT(0) +#define v_COLOR_DEPTH_NOT_INDICATED(n) ((n) << 4) +#define v_SOF_ENABLE (0 << 3) +#define v_SOF_DISABLE BIT(3) +#define v_COLOR_RANGE_FULL BIT(2) +#define v_COLOR_RANGE_LIMITED (0 << 2) +#define v_CSC_ENABLE 1 +#define v_CSC_DISABLE 0 + +#define HDMI_AV_MUTE 0x05 +#define m_AVMUTE_CLEAR BIT(7) +#define m_AVMUTE_ENABLE BIT(6) +#define m_AUDIO_MUTE BIT(1) +#define m_VIDEO_BLACK BIT(0) +#define v_AVMUTE_CLEAR(n) ((n) << 7) +#define v_AVMUTE_ENABLE(n) ((n) << 6) +#define v_AUDIO_MUTE(n) ((n) << 1) +#define v_VIDEO_MUTE(n) ((n) << 0) + +#define HDMI_VIDEO_TIMING_CTL 0x08 +#define v_HSYNC_POLARITY(n) ((n) << 3) +#define v_VSYNC_POLARITY(n) ((n) << 2) +#define v_INETLACE(n) ((n) << 1) +#define v_EXTERANL_VIDEO(n) ((n) << 0) + +#define HDMI_VIDEO_EXT_HTOTAL_L 0x09 +#define HDMI_VIDEO_EXT_HTOTAL_H 0x0a +#define HDMI_VIDEO_EXT_HBLANK_L 0x0b +#define HDMI_VIDEO_EXT_HBLANK_H 0x0c +#define HDMI_VIDEO_EXT_HDELAY_L 0x0d +#define HDMI_VIDEO_EXT_HDELAY_H 0x0e +#define HDMI_VIDEO_EXT_HDURATION_L 0x0f +#define HDMI_VIDEO_EXT_HDURATION_H 0x10 +#define HDMI_VIDEO_EXT_VTOTAL_L 0x11 +#define HDMI_VIDEO_EXT_VTOTAL_H 0x12 +#define HDMI_VIDEO_EXT_VBLANK 0x13 +#define HDMI_VIDEO_EXT_VDELAY 0x14 +#define HDMI_VIDEO_EXT_VDURATION 0x15 + +#define HDMI_VIDEO_CSC_COEF 0x18 + +#define HDMI_AUDIO_CTRL1 0x35 +enum { + CTS_SOURCE_INTERNAL = 0, + CTS_SOURCE_EXTERNAL = 1, +}; + +#define v_CTS_SOURCE(n) ((n) << 7) + +enum { + DOWNSAMPLE_DISABLE = 0, + DOWNSAMPLE_1_2 = 1, + DOWNSAMPLE_1_4 = 2, +}; + +#define v_DOWN_SAMPLE(n) ((n) << 5) + +enum { + AUDIO_SOURCE_IIS = 0, + AUDIO_SOURCE_SPDIF = 1, +}; + +#define v_AUDIO_SOURCE(n) ((n) << 3) + +#define v_MCLK_ENABLE(n) ((n) << 2) + +enum { + MCLK_128FS = 0, + MCLK_256FS = 1, + MCLK_384FS = 2, + MCLK_512FS = 3, +}; + +#define v_MCLK_RATIO(n) (n) + +#define AUDIO_SAMPLE_RATE 0x37 + +enum { + AUDIO_32K = 0x3, + AUDIO_441K = 0x0, + AUDIO_48K = 0x2, + AUDIO_882K = 0x8, + AUDIO_96K = 0xa, + AUDIO_1764K = 0xc, + AUDIO_192K = 0xe, +}; + +#define AUDIO_I2S_MODE 0x38 + +enum { + I2S_CHANNEL_1_2 = 1, + I2S_CHANNEL_3_4 = 3, + I2S_CHANNEL_5_6 = 7, + I2S_CHANNEL_7_8 = 0xf +}; + +#define v_I2S_CHANNEL(n) ((n) << 2) + +enum { + I2S_STANDARD = 0, + I2S_LEFT_JUSTIFIED = 1, + I2S_RIGHT_JUSTIFIED = 2, +}; + +#define v_I2S_MODE(n) (n) + +#define AUDIO_I2S_MAP 0x39 +#define AUDIO_I2S_SWAPS_SPDIF 0x3a +#define v_SPIDF_FREQ(n) (n) + +#define N_32K 0x1000 +#define N_441K 0x1880 +#define N_882K 0x3100 +#define N_1764K 0x6200 +#define N_48K 0x1800 +#define N_96K 0x3000 +#define N_192K 0x6000 + +#define HDMI_AUDIO_CHANNEL_STATUS 0x3e +#define m_AUDIO_STATUS_NLPCM BIT(7) +#define m_AUDIO_STATUS_USE BIT(6) +#define m_AUDIO_STATUS_COPYRIGHT BIT(5) +#define m_AUDIO_STATUS_ADDITION (3 << 2) +#define m_AUDIO_STATUS_CLK_ACCURACY (2 << 0) +#define v_AUDIO_STATUS_NLPCM(n) (((n) & 1) << 7) +#define AUDIO_N_H 0x3f +#define AUDIO_N_M 0x40 +#define AUDIO_N_L 0x41 + +#define HDMI_AUDIO_CTS_H 0x45 +#define HDMI_AUDIO_CTS_M 0x46 +#define HDMI_AUDIO_CTS_L 0x47 + +#define HDMI_DDC_CLK_L 0x4b +#define HDMI_DDC_CLK_H 0x4c + +#define HDMI_EDID_SEGMENT_POINTER 0x4d +#define HDMI_EDID_WORD_ADDR 0x4e +#define HDMI_EDID_FIFO_OFFSET 0x4f +#define HDMI_EDID_FIFO_ADDR 0x50 + +#define HDMI_PACKET_SEND_MANUAL 0x9c +#define HDMI_PACKET_SEND_AUTO 0x9d +#define m_PACKET_GCP_EN BIT(7) +#define m_PACKET_MSI_EN BIT(6) +#define m_PACKET_SDI_EN BIT(5) +#define m_PACKET_VSI_EN BIT(4) +#define v_PACKET_GCP_EN(n) (((n) & 1) << 7) +#define v_PACKET_MSI_EN(n) (((n) & 1) << 6) +#define v_PACKET_SDI_EN(n) (((n) & 1) << 5) +#define v_PACKET_VSI_EN(n) (((n) & 1) << 4) + +#define HDMI_CONTROL_PACKET_BUF_INDEX 0x9f + +enum { + INFOFRAME_VSI = 0x05, + INFOFRAME_AVI = 0x06, + INFOFRAME_AAI = 0x08, +}; + +#define HDMI_CONTROL_PACKET_ADDR 0xa0 +#define HDMI_MAXIMUM_INFO_FRAME_SIZE 0x11 + +enum { + AVI_COLOR_MODE_RGB = 0, + AVI_COLOR_MODE_YCBCR422 = 1, + AVI_COLOR_MODE_YCBCR444 = 2, + AVI_COLORIMETRY_NO_DATA = 0, + + AVI_COLORIMETRY_SMPTE_170M = 1, + AVI_COLORIMETRY_ITU709 = 2, + AVI_COLORIMETRY_EXTENDED = 3, + + AVI_CODED_FRAME_ASPECT_NO_DATA = 0, + AVI_CODED_FRAME_ASPECT_4_3 = 1, + AVI_CODED_FRAME_ASPECT_16_9 = 2, + + ACTIVE_ASPECT_RATE_SAME_AS_CODED_FRAME = 0x08, + ACTIVE_ASPECT_RATE_4_3 = 0x09, + ACTIVE_ASPECT_RATE_16_9 = 0x0A, + ACTIVE_ASPECT_RATE_14_9 = 0x0B, +}; + +#define HDMI_HDCP_CTRL 0x52 +#define m_HDMI_DVI BIT(1) +#define v_HDMI_DVI(n) ((n) << 1) + +#define HDMI_INTERRUPT_MASK1 0xc0 +#define HDMI_INTERRUPT_STATUS1 0xc1 +#define m_INT_ACTIVE_VSYNC BIT(5) +#define m_INT_EDID_READY BIT(2) + +#define HDMI_INTERRUPT_MASK2 0xc2 +#define HDMI_INTERRUPT_STATUS2 0xc3 +#define m_INT_HDCP_ERR BIT(7) +#define m_INT_BKSV_FLAG BIT(6) +#define m_INT_HDCP_OK BIT(4) + +#define HDMI_STATUS 0xc8 +#define m_HOTPLUG BIT(7) +#define m_MASK_INT_HOTPLUG BIT(5) +#define m_INT_HOTPLUG BIT(1) +#define v_MASK_INT_HOTPLUG(n) (((n) & 0x1) << 5) + +#define HDMI_COLORBAR 0xc9 + +#define HDMI_PHY_SYNC 0xce +#define HDMI_PHY_SYS_CTL 0xe0 +#define m_TMDS_CLK_SOURCE BIT(5) +#define v_TMDS_FROM_PLL (0 << 5) +#define v_TMDS_FROM_GEN BIT(5) +#define m_PHASE_CLK BIT(4) +#define v_DEFAULT_PHASE (0 << 4) +#define v_SYNC_PHASE BIT(4) +#define m_TMDS_CURRENT_PWR BIT(3) +#define v_TURN_ON_CURRENT (0 << 3) +#define v_CAT_OFF_CURRENT BIT(3) +#define m_BANDGAP_PWR BIT(2) +#define v_BANDGAP_PWR_UP (0 << 2) +#define v_BANDGAP_PWR_DOWN BIT(2) +#define m_PLL_PWR BIT(1) +#define v_PLL_PWR_UP (0 << 1) +#define v_PLL_PWR_DOWN BIT(1) +#define m_TMDS_CHG_PWR BIT(0) +#define v_TMDS_CHG_PWR_UP (0 << 0) +#define v_TMDS_CHG_PWR_DOWN BIT(0) + +#define HDMI_PHY_CHG_PWR 0xe1 +#define v_CLK_CHG_PWR(n) (((n) & 1) << 3) +#define v_DATA_CHG_PWR(n) (((n) & 7) << 0) + +#define HDMI_PHY_DRIVER 0xe2 +#define v_CLK_MAIN_DRIVER(n) ((n) << 4) +#define v_DATA_MAIN_DRIVER(n) ((n) << 0) + +#define HDMI_PHY_PRE_EMPHASIS 0xe3 +#define v_PRE_EMPHASIS(n) (((n) & 7) << 4) +#define v_CLK_PRE_DRIVER(n) (((n) & 3) << 2) +#define v_DATA_PRE_DRIVER(n) (((n) & 3) << 0) + +#define HDMI_PHY_FEEDBACK_DIV_RATIO_LOW 0xe7 +#define v_FEEDBACK_DIV_LOW(n) ((n) & 0xff) +#define HDMI_PHY_FEEDBACK_DIV_RATIO_HIGH 0xe8 +#define v_FEEDBACK_DIV_HIGH(n) ((n) & 1) + +#define HDMI_PHY_PRE_DIV_RATIO 0xed +#define v_PRE_DIV_RATIO(n) ((n) & 0x1f) + +#define HDMI_CEC_CTRL 0xd0 +#define m_ADJUST_FOR_HISENSE BIT(6) +#define m_REJECT_RX_BROADCAST BIT(5) +#define m_BUSFREETIME_ENABLE BIT(2) +#define m_REJECT_RX BIT(1) +#define m_START_TX BIT(0) + +#define HDMI_CEC_DATA 0xd1 +#define HDMI_CEC_TX_OFFSET 0xd2 +#define HDMI_CEC_RX_OFFSET 0xd3 +#define HDMI_CEC_CLK_H 0xd4 +#define HDMI_CEC_CLK_L 0xd5 +#define HDMI_CEC_TX_LENGTH 0xd6 +#define HDMI_CEC_RX_LENGTH 0xd7 +#define HDMI_CEC_TX_INT_MASK 0xd8 +#define m_TX_DONE BIT(3) +#define m_TX_NOACK BIT(2) +#define m_TX_BROADCAST_REJ BIT(1) +#define m_TX_BUSNOTFREE BIT(0) + +#define HDMI_CEC_RX_INT_MASK 0xd9 +#define m_RX_LA_ERR BIT(4) +#define m_RX_GLITCH BIT(3) +#define m_RX_DONE BIT(0) + +#define HDMI_CEC_TX_INT 0xda +#define HDMI_CEC_RX_INT 0xdb +#define HDMI_CEC_BUSFREETIME_L 0xdc +#define HDMI_CEC_BUSFREETIME_H 0xdd +#define HDMI_CEC_LOGICADDR 0xde #define RK3036_GRF_SOC_CON2 0x148 #define RK3036_HDMI_PHSYNC BIT(4) @@ -255,22 +603,37 @@ static void inno_hdmi_power_up(struct inno_hdmi *hdmi, inno_hdmi_sys_power(hdmi, true); }; -static void inno_hdmi_reset(struct inno_hdmi *hdmi) +static void inno_hdmi_init_hw(struct inno_hdmi *hdmi) { u32 val; u32 msk; hdmi_modb(hdmi, HDMI_SYS_CTRL, m_RST_DIGITAL, v_NOT_RST_DIGITAL); - udelay(100); + usleep_range(100, 150); hdmi_modb(hdmi, HDMI_SYS_CTRL, m_RST_ANALOG, v_NOT_RST_ANALOG); - udelay(100); + usleep_range(100, 150); msk = m_REG_CLK_INV | m_REG_CLK_SOURCE | m_POWER | m_INT_POL; val = v_REG_CLK_INV | v_REG_CLK_SOURCE_SYS | v_PWR_ON | v_INT_POL_HIGH; hdmi_modb(hdmi, HDMI_SYS_CTRL, msk, val); inno_hdmi_standby(hdmi); + + /* + * When the controller isn't configured to an accurate + * video timing and there is no reference clock available, + * then the TMDS clock source would be switched to PCLK_HDMI, + * so we need to init the TMDS rate to PCLK rate, and + * reconfigure the DDC clock. + */ + if (hdmi->refclk) + inno_hdmi_i2c_init(hdmi, clk_get_rate(hdmi->refclk)); + else + inno_hdmi_i2c_init(hdmi, clk_get_rate(hdmi->pclk)); + + /* Unmute hotplug interrupt */ + hdmi_modb(hdmi, HDMI_STATUS, m_MASK_INT_HOTPLUG, v_MASK_INT_HOTPLUG(1)); } static int inno_hdmi_disable_frame(struct drm_connector *connector, @@ -392,10 +755,10 @@ static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi, int value, psync; if (hdmi->variant->dev_type == RK3036_HDMI) { - psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? RK3036_HDMI_PHSYNC : 0; - value = HIWORD_UPDATE(psync, RK3036_HDMI_PHSYNC); - psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? RK3036_HDMI_PVSYNC : 0; - value |= HIWORD_UPDATE(psync, RK3036_HDMI_PVSYNC); + psync = mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 : 0; + value = FIELD_PREP_WM16(RK3036_HDMI_PHSYNC, psync); + psync = mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 : 0; + value |= FIELD_PREP_WM16(RK3036_HDMI_PVSYNC, psync); regmap_write(hdmi->grf, RK3036_GRF_SOC_CON2, value); } @@ -775,8 +1138,7 @@ static int inno_hdmi_i2c_write(struct inno_hdmi *hdmi, struct i2c_msg *msgs) * we assume that each word write to this i2c adapter * should be the offset of EDID word address. */ - if ((msgs->len != 1) || - ((msgs->addr != DDC_ADDR) && (msgs->addr != DDC_SEGMENT_ADDR))) + if (msgs->len != 1 || (msgs->addr != DDC_ADDR && msgs->addr != DDC_SEGMENT_ADDR)) return -EINVAL; reinit_completion(&hdmi->i2c->cmp); @@ -867,10 +1229,9 @@ static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi) strscpy(adap->name, "Inno HDMI", sizeof(adap->name)); i2c_set_adapdata(adap, hdmi); - ret = i2c_add_adapter(adap); + ret = devm_i2c_add_adapter(hdmi->dev, adap); if (ret) { dev_warn(hdmi->dev, "cannot add %s I2C adapter\n", adap->name); - devm_kfree(hdmi->dev, i2c); return ERR_PTR(ret); } @@ -907,71 +1268,37 @@ static int inno_hdmi_bind(struct device *dev, struct device *master, if (IS_ERR(hdmi->regs)) return PTR_ERR(hdmi->regs); - hdmi->pclk = devm_clk_get(hdmi->dev, "pclk"); + hdmi->pclk = devm_clk_get_enabled(hdmi->dev, "pclk"); if (IS_ERR(hdmi->pclk)) return dev_err_probe(dev, PTR_ERR(hdmi->pclk), "Unable to get HDMI pclk\n"); - ret = clk_prepare_enable(hdmi->pclk); - if (ret) - return dev_err_probe(dev, ret, "Cannot enable HDMI pclk: %d\n", ret); - - hdmi->refclk = devm_clk_get_optional(hdmi->dev, "ref"); - if (IS_ERR(hdmi->refclk)) { - ret = dev_err_probe(dev, PTR_ERR(hdmi->refclk), "Unable to get HDMI refclk\n"); - goto err_disable_pclk; - } - - ret = clk_prepare_enable(hdmi->refclk); - if (ret) { - ret = dev_err_probe(dev, ret, "Cannot enable HDMI refclk: %d\n", ret); - goto err_disable_pclk; - } + hdmi->refclk = devm_clk_get_optional_enabled(hdmi->dev, "ref"); + if (IS_ERR(hdmi->refclk)) + return dev_err_probe(dev, PTR_ERR(hdmi->refclk), "Unable to get HDMI refclk\n"); if (hdmi->variant->dev_type == RK3036_HDMI) { hdmi->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); - if (IS_ERR(hdmi->grf)) { - ret = dev_err_probe(dev, PTR_ERR(hdmi->grf), - "Unable to get rockchip,grf\n"); - goto err_disable_clk; - } + if (IS_ERR(hdmi->grf)) + return dev_err_probe(dev, + PTR_ERR(hdmi->grf), "Unable to get rockchip,grf\n"); } irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto err_disable_clk; - } + if (irq < 0) + return irq; - inno_hdmi_reset(hdmi); + inno_hdmi_init_hw(hdmi); hdmi->ddc = inno_hdmi_i2c_adapter(hdmi); - if (IS_ERR(hdmi->ddc)) { - ret = PTR_ERR(hdmi->ddc); - hdmi->ddc = NULL; - goto err_disable_clk; - } - - /* - * When the controller isn't configured to an accurate - * video timing and there is no reference clock available, - * then the TMDS clock source would be switched to PCLK_HDMI, - * so we need to init the TMDS rate to PCLK rate, and - * reconfigure the DDC clock. - */ - if (hdmi->refclk) - inno_hdmi_i2c_init(hdmi, clk_get_rate(hdmi->refclk)); - else - inno_hdmi_i2c_init(hdmi, clk_get_rate(hdmi->pclk)); + if (IS_ERR(hdmi->ddc)) + return PTR_ERR(hdmi->ddc); ret = inno_hdmi_register(drm, hdmi); if (ret) - goto err_put_adapter; + return ret; dev_set_drvdata(dev, hdmi); - /* Unmute hotplug interrupt */ - hdmi_modb(hdmi, HDMI_STATUS, m_MASK_INT_HOTPLUG, v_MASK_INT_HOTPLUG(1)); - ret = devm_request_threaded_irq(dev, irq, inno_hdmi_hardirq, inno_hdmi_irq, IRQF_SHARED, dev_name(dev), hdmi); @@ -982,12 +1309,6 @@ static int inno_hdmi_bind(struct device *dev, struct device *master, err_cleanup_hdmi: hdmi->connector.funcs->destroy(&hdmi->connector); hdmi->encoder.encoder.funcs->destroy(&hdmi->encoder.encoder); -err_put_adapter: - i2c_put_adapter(hdmi->ddc); -err_disable_clk: - clk_disable_unprepare(hdmi->refclk); -err_disable_pclk: - clk_disable_unprepare(hdmi->pclk); return ret; } @@ -998,10 +1319,6 @@ static void inno_hdmi_unbind(struct device *dev, struct device *master, hdmi->connector.funcs->destroy(&hdmi->connector); hdmi->encoder.encoder.funcs->destroy(&hdmi->encoder.encoder); - - i2c_put_adapter(hdmi->ddc); - clk_disable_unprepare(hdmi->refclk); - clk_disable_unprepare(hdmi->pclk); } static const struct component_ops inno_hdmi_ops = { diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.h b/drivers/gpu/drm/rockchip/inno_hdmi.h deleted file mode 100644 index 8b7ef3fac485..000000000000 --- a/drivers/gpu/drm/rockchip/inno_hdmi.h +++ /dev/null @@ -1,349 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) Rockchip Electronics Co., Ltd. - * Zheng Yang <zhengyang@rock-chips.com> - * Yakir Yang <ykk@rock-chips.com> - */ - -#ifndef __INNO_HDMI_H__ -#define __INNO_HDMI_H__ - -#define DDC_SEGMENT_ADDR 0x30 - -#define HDMI_SCL_RATE (100*1000) -#define DDC_BUS_FREQ_L 0x4b -#define DDC_BUS_FREQ_H 0x4c - -#define HDMI_SYS_CTRL 0x00 -#define m_RST_ANALOG (1 << 6) -#define v_RST_ANALOG (0 << 6) -#define v_NOT_RST_ANALOG (1 << 6) -#define m_RST_DIGITAL (1 << 5) -#define v_RST_DIGITAL (0 << 5) -#define v_NOT_RST_DIGITAL (1 << 5) -#define m_REG_CLK_INV (1 << 4) -#define v_REG_CLK_NOT_INV (0 << 4) -#define v_REG_CLK_INV (1 << 4) -#define m_VCLK_INV (1 << 3) -#define v_VCLK_NOT_INV (0 << 3) -#define v_VCLK_INV (1 << 3) -#define m_REG_CLK_SOURCE (1 << 2) -#define v_REG_CLK_SOURCE_TMDS (0 << 2) -#define v_REG_CLK_SOURCE_SYS (1 << 2) -#define m_POWER (1 << 1) -#define v_PWR_ON (0 << 1) -#define v_PWR_OFF (1 << 1) -#define m_INT_POL (1 << 0) -#define v_INT_POL_HIGH 1 -#define v_INT_POL_LOW 0 - -#define HDMI_VIDEO_CONTRL1 0x01 -#define m_VIDEO_INPUT_FORMAT (7 << 1) -#define m_DE_SOURCE (1 << 0) -#define v_VIDEO_INPUT_FORMAT(n) (n << 1) -#define v_DE_EXTERNAL 1 -#define v_DE_INTERNAL 0 -enum { - VIDEO_INPUT_SDR_RGB444 = 0, - VIDEO_INPUT_DDR_RGB444 = 5, - VIDEO_INPUT_DDR_YCBCR422 = 6 -}; - -#define HDMI_VIDEO_CONTRL2 0x02 -#define m_VIDEO_OUTPUT_COLOR (3 << 6) -#define m_VIDEO_INPUT_BITS (3 << 4) -#define m_VIDEO_INPUT_CSP (1 << 0) -#define v_VIDEO_OUTPUT_COLOR(n) (((n) & 0x3) << 6) -#define v_VIDEO_INPUT_BITS(n) (n << 4) -#define v_VIDEO_INPUT_CSP(n) (n << 0) -enum { - VIDEO_INPUT_12BITS = 0, - VIDEO_INPUT_10BITS = 1, - VIDEO_INPUT_REVERT = 2, - VIDEO_INPUT_8BITS = 3, -}; - -#define HDMI_VIDEO_CONTRL 0x03 -#define m_VIDEO_AUTO_CSC (1 << 7) -#define v_VIDEO_AUTO_CSC(n) (n << 7) -#define m_VIDEO_C0_C2_SWAP (1 << 0) -#define v_VIDEO_C0_C2_SWAP(n) (n << 0) -enum { - C0_C2_CHANGE_ENABLE = 0, - C0_C2_CHANGE_DISABLE = 1, - AUTO_CSC_DISABLE = 0, - AUTO_CSC_ENABLE = 1, -}; - -#define HDMI_VIDEO_CONTRL3 0x04 -#define m_COLOR_DEPTH_NOT_INDICATED (1 << 4) -#define m_SOF (1 << 3) -#define m_COLOR_RANGE (1 << 2) -#define m_CSC (1 << 0) -#define v_COLOR_DEPTH_NOT_INDICATED(n) ((n) << 4) -#define v_SOF_ENABLE (0 << 3) -#define v_SOF_DISABLE (1 << 3) -#define v_COLOR_RANGE_FULL (1 << 2) -#define v_COLOR_RANGE_LIMITED (0 << 2) -#define v_CSC_ENABLE 1 -#define v_CSC_DISABLE 0 - -#define HDMI_AV_MUTE 0x05 -#define m_AVMUTE_CLEAR (1 << 7) -#define m_AVMUTE_ENABLE (1 << 6) -#define m_AUDIO_MUTE (1 << 1) -#define m_VIDEO_BLACK (1 << 0) -#define v_AVMUTE_CLEAR(n) (n << 7) -#define v_AVMUTE_ENABLE(n) (n << 6) -#define v_AUDIO_MUTE(n) (n << 1) -#define v_VIDEO_MUTE(n) (n << 0) - -#define HDMI_VIDEO_TIMING_CTL 0x08 -#define v_HSYNC_POLARITY(n) (n << 3) -#define v_VSYNC_POLARITY(n) (n << 2) -#define v_INETLACE(n) (n << 1) -#define v_EXTERANL_VIDEO(n) (n << 0) - -#define HDMI_VIDEO_EXT_HTOTAL_L 0x09 -#define HDMI_VIDEO_EXT_HTOTAL_H 0x0a -#define HDMI_VIDEO_EXT_HBLANK_L 0x0b -#define HDMI_VIDEO_EXT_HBLANK_H 0x0c -#define HDMI_VIDEO_EXT_HDELAY_L 0x0d -#define HDMI_VIDEO_EXT_HDELAY_H 0x0e -#define HDMI_VIDEO_EXT_HDURATION_L 0x0f -#define HDMI_VIDEO_EXT_HDURATION_H 0x10 -#define HDMI_VIDEO_EXT_VTOTAL_L 0x11 -#define HDMI_VIDEO_EXT_VTOTAL_H 0x12 -#define HDMI_VIDEO_EXT_VBLANK 0x13 -#define HDMI_VIDEO_EXT_VDELAY 0x14 -#define HDMI_VIDEO_EXT_VDURATION 0x15 - -#define HDMI_VIDEO_CSC_COEF 0x18 - -#define HDMI_AUDIO_CTRL1 0x35 -enum { - CTS_SOURCE_INTERNAL = 0, - CTS_SOURCE_EXTERNAL = 1, -}; -#define v_CTS_SOURCE(n) (n << 7) - -enum { - DOWNSAMPLE_DISABLE = 0, - DOWNSAMPLE_1_2 = 1, - DOWNSAMPLE_1_4 = 2, -}; -#define v_DOWN_SAMPLE(n) (n << 5) - -enum { - AUDIO_SOURCE_IIS = 0, - AUDIO_SOURCE_SPDIF = 1, -}; -#define v_AUDIO_SOURCE(n) (n << 3) - -#define v_MCLK_ENABLE(n) (n << 2) -enum { - MCLK_128FS = 0, - MCLK_256FS = 1, - MCLK_384FS = 2, - MCLK_512FS = 3, -}; -#define v_MCLK_RATIO(n) (n) - -#define AUDIO_SAMPLE_RATE 0x37 -enum { - AUDIO_32K = 0x3, - AUDIO_441K = 0x0, - AUDIO_48K = 0x2, - AUDIO_882K = 0x8, - AUDIO_96K = 0xa, - AUDIO_1764K = 0xc, - AUDIO_192K = 0xe, -}; - -#define AUDIO_I2S_MODE 0x38 -enum { - I2S_CHANNEL_1_2 = 1, - I2S_CHANNEL_3_4 = 3, - I2S_CHANNEL_5_6 = 7, - I2S_CHANNEL_7_8 = 0xf -}; -#define v_I2S_CHANNEL(n) ((n) << 2) -enum { - I2S_STANDARD = 0, - I2S_LEFT_JUSTIFIED = 1, - I2S_RIGHT_JUSTIFIED = 2, -}; -#define v_I2S_MODE(n) (n) - -#define AUDIO_I2S_MAP 0x39 -#define AUDIO_I2S_SWAPS_SPDIF 0x3a -#define v_SPIDF_FREQ(n) (n) - -#define N_32K 0x1000 -#define N_441K 0x1880 -#define N_882K 0x3100 -#define N_1764K 0x6200 -#define N_48K 0x1800 -#define N_96K 0x3000 -#define N_192K 0x6000 - -#define HDMI_AUDIO_CHANNEL_STATUS 0x3e -#define m_AUDIO_STATUS_NLPCM (1 << 7) -#define m_AUDIO_STATUS_USE (1 << 6) -#define m_AUDIO_STATUS_COPYRIGHT (1 << 5) -#define m_AUDIO_STATUS_ADDITION (3 << 2) -#define m_AUDIO_STATUS_CLK_ACCURACY (2 << 0) -#define v_AUDIO_STATUS_NLPCM(n) ((n & 1) << 7) -#define AUDIO_N_H 0x3f -#define AUDIO_N_M 0x40 -#define AUDIO_N_L 0x41 - -#define HDMI_AUDIO_CTS_H 0x45 -#define HDMI_AUDIO_CTS_M 0x46 -#define HDMI_AUDIO_CTS_L 0x47 - -#define HDMI_DDC_CLK_L 0x4b -#define HDMI_DDC_CLK_H 0x4c - -#define HDMI_EDID_SEGMENT_POINTER 0x4d -#define HDMI_EDID_WORD_ADDR 0x4e -#define HDMI_EDID_FIFO_OFFSET 0x4f -#define HDMI_EDID_FIFO_ADDR 0x50 - -#define HDMI_PACKET_SEND_MANUAL 0x9c -#define HDMI_PACKET_SEND_AUTO 0x9d -#define m_PACKET_GCP_EN (1 << 7) -#define m_PACKET_MSI_EN (1 << 6) -#define m_PACKET_SDI_EN (1 << 5) -#define m_PACKET_VSI_EN (1 << 4) -#define v_PACKET_GCP_EN(n) ((n & 1) << 7) -#define v_PACKET_MSI_EN(n) ((n & 1) << 6) -#define v_PACKET_SDI_EN(n) ((n & 1) << 5) -#define v_PACKET_VSI_EN(n) ((n & 1) << 4) - -#define HDMI_CONTROL_PACKET_BUF_INDEX 0x9f -enum { - INFOFRAME_VSI = 0x05, - INFOFRAME_AVI = 0x06, - INFOFRAME_AAI = 0x08, -}; - -#define HDMI_CONTROL_PACKET_ADDR 0xa0 -#define HDMI_MAXIMUM_INFO_FRAME_SIZE 0x11 -enum { - AVI_COLOR_MODE_RGB = 0, - AVI_COLOR_MODE_YCBCR422 = 1, - AVI_COLOR_MODE_YCBCR444 = 2, - AVI_COLORIMETRY_NO_DATA = 0, - - AVI_COLORIMETRY_SMPTE_170M = 1, - AVI_COLORIMETRY_ITU709 = 2, - AVI_COLORIMETRY_EXTENDED = 3, - - AVI_CODED_FRAME_ASPECT_NO_DATA = 0, - AVI_CODED_FRAME_ASPECT_4_3 = 1, - AVI_CODED_FRAME_ASPECT_16_9 = 2, - - ACTIVE_ASPECT_RATE_SAME_AS_CODED_FRAME = 0x08, - ACTIVE_ASPECT_RATE_4_3 = 0x09, - ACTIVE_ASPECT_RATE_16_9 = 0x0A, - ACTIVE_ASPECT_RATE_14_9 = 0x0B, -}; - -#define HDMI_HDCP_CTRL 0x52 -#define m_HDMI_DVI (1 << 1) -#define v_HDMI_DVI(n) (n << 1) - -#define HDMI_INTERRUPT_MASK1 0xc0 -#define HDMI_INTERRUPT_STATUS1 0xc1 -#define m_INT_ACTIVE_VSYNC (1 << 5) -#define m_INT_EDID_READY (1 << 2) - -#define HDMI_INTERRUPT_MASK2 0xc2 -#define HDMI_INTERRUPT_STATUS2 0xc3 -#define m_INT_HDCP_ERR (1 << 7) -#define m_INT_BKSV_FLAG (1 << 6) -#define m_INT_HDCP_OK (1 << 4) - -#define HDMI_STATUS 0xc8 -#define m_HOTPLUG (1 << 7) -#define m_MASK_INT_HOTPLUG (1 << 5) -#define m_INT_HOTPLUG (1 << 1) -#define v_MASK_INT_HOTPLUG(n) ((n & 0x1) << 5) - -#define HDMI_COLORBAR 0xc9 - -#define HDMI_PHY_SYNC 0xce -#define HDMI_PHY_SYS_CTL 0xe0 -#define m_TMDS_CLK_SOURCE (1 << 5) -#define v_TMDS_FROM_PLL (0 << 5) -#define v_TMDS_FROM_GEN (1 << 5) -#define m_PHASE_CLK (1 << 4) -#define v_DEFAULT_PHASE (0 << 4) -#define v_SYNC_PHASE (1 << 4) -#define m_TMDS_CURRENT_PWR (1 << 3) -#define v_TURN_ON_CURRENT (0 << 3) -#define v_CAT_OFF_CURRENT (1 << 3) -#define m_BANDGAP_PWR (1 << 2) -#define v_BANDGAP_PWR_UP (0 << 2) -#define v_BANDGAP_PWR_DOWN (1 << 2) -#define m_PLL_PWR (1 << 1) -#define v_PLL_PWR_UP (0 << 1) -#define v_PLL_PWR_DOWN (1 << 1) -#define m_TMDS_CHG_PWR (1 << 0) -#define v_TMDS_CHG_PWR_UP (0 << 0) -#define v_TMDS_CHG_PWR_DOWN (1 << 0) - -#define HDMI_PHY_CHG_PWR 0xe1 -#define v_CLK_CHG_PWR(n) ((n & 1) << 3) -#define v_DATA_CHG_PWR(n) ((n & 7) << 0) - -#define HDMI_PHY_DRIVER 0xe2 -#define v_CLK_MAIN_DRIVER(n) (n << 4) -#define v_DATA_MAIN_DRIVER(n) (n << 0) - -#define HDMI_PHY_PRE_EMPHASIS 0xe3 -#define v_PRE_EMPHASIS(n) ((n & 7) << 4) -#define v_CLK_PRE_DRIVER(n) ((n & 3) << 2) -#define v_DATA_PRE_DRIVER(n) ((n & 3) << 0) - -#define HDMI_PHY_FEEDBACK_DIV_RATIO_LOW 0xe7 -#define v_FEEDBACK_DIV_LOW(n) (n & 0xff) -#define HDMI_PHY_FEEDBACK_DIV_RATIO_HIGH 0xe8 -#define v_FEEDBACK_DIV_HIGH(n) (n & 1) - -#define HDMI_PHY_PRE_DIV_RATIO 0xed -#define v_PRE_DIV_RATIO(n) (n & 0x1f) - -#define HDMI_CEC_CTRL 0xd0 -#define m_ADJUST_FOR_HISENSE (1 << 6) -#define m_REJECT_RX_BROADCAST (1 << 5) -#define m_BUSFREETIME_ENABLE (1 << 2) -#define m_REJECT_RX (1 << 1) -#define m_START_TX (1 << 0) - -#define HDMI_CEC_DATA 0xd1 -#define HDMI_CEC_TX_OFFSET 0xd2 -#define HDMI_CEC_RX_OFFSET 0xd3 -#define HDMI_CEC_CLK_H 0xd4 -#define HDMI_CEC_CLK_L 0xd5 -#define HDMI_CEC_TX_LENGTH 0xd6 -#define HDMI_CEC_RX_LENGTH 0xd7 -#define HDMI_CEC_TX_INT_MASK 0xd8 -#define m_TX_DONE (1 << 3) -#define m_TX_NOACK (1 << 2) -#define m_TX_BROADCAST_REJ (1 << 1) -#define m_TX_BUSNOTFREE (1 << 0) - -#define HDMI_CEC_RX_INT_MASK 0xd9 -#define m_RX_LA_ERR (1 << 4) -#define m_RX_GLITCH (1 << 3) -#define m_RX_DONE (1 << 0) - -#define HDMI_CEC_TX_INT 0xda -#define HDMI_CEC_RX_INT 0xdb -#define HDMI_CEC_BUSFREETIME_L 0xdc -#define HDMI_CEC_BUSFREETIME_H 0xdd -#define HDMI_CEC_LOGICADDR 0xde - -#endif /* __INNO_HDMI_H__ */ diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index e7875b52f298..ae4a5ac2299a 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -450,7 +450,7 @@ struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = { }; static enum drm_connector_status -rk3066_hdmi_bridge_detect(struct drm_bridge *bridge) +rk3066_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct rk3066_hdmi *hdmi = bridge_to_rk3066_hdmi(bridge); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 180fad5d49ad..eb77bde9f628 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -529,6 +529,7 @@ static int __init rockchip_drm_init(void) ADD_ROCKCHIP_SUB_DRIVER(rockchip_dp_driver, CONFIG_ROCKCHIP_ANALOGIX_DP); ADD_ROCKCHIP_SUB_DRIVER(cdn_dp_driver, CONFIG_ROCKCHIP_CDN_DP); + ADD_ROCKCHIP_SUB_DRIVER(dw_dp_driver, CONFIG_ROCKCHIP_DW_DP); ADD_ROCKCHIP_SUB_DRIVER(dw_hdmi_rockchip_pltfm_driver, CONFIG_ROCKCHIP_DW_HDMI); ADD_ROCKCHIP_SUB_DRIVER(dw_hdmi_qp_rockchip_pltfm_driver, diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index c183e82a42a5..2e86ad00979c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -87,6 +87,7 @@ int rockchip_drm_encoder_set_crtc_endpoint_id(struct rockchip_encoder *rencoder, struct device_node *np, int port, int reg); int rockchip_drm_endpoint_is_subdriver(struct device_node *ep); extern struct platform_driver cdn_dp_driver; +extern struct platform_driver dw_dp_driver; extern struct platform_driver dw_hdmi_rockchip_pltfm_driver; extern struct platform_driver dw_hdmi_qp_rockchip_pltfm_driver; extern struct platform_driver dw_mipi_dsi_rockchip_driver; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index 5829ee061c61..2f469d370021 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -30,21 +30,18 @@ static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = static struct drm_framebuffer * rockchip_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_afbc_framebuffer *afbc_fb; - const struct drm_format_info *info; int ret; - info = drm_get_format_info(dev, mode_cmd); - if (!info) - return ERR_PTR(-ENOMEM); - afbc_fb = kzalloc(sizeof(*afbc_fb), GFP_KERNEL); if (!afbc_fb) return ERR_PTR(-ENOMEM); - ret = drm_gem_fb_init_with_funcs(dev, &afbc_fb->base, file, mode_cmd, + ret = drm_gem_fb_init_with_funcs(dev, &afbc_fb->base, + file, info, mode_cmd, &rockchip_drm_fb_funcs); if (ret) { kfree(afbc_fb); @@ -52,7 +49,7 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file, } if (drm_is_afbc(mode_cmd->modifier[0])) { - ret = drm_gem_fb_afbc_init(dev, mode_cmd, afbc_fb); + ret = drm_gem_fb_afbc_init(dev, info, mode_cmd, afbc_fb); if (ret) { drm_framebuffer_put(&afbc_fb->base); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index d0f5fea15e21..7ec7bea5e38e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -146,25 +146,6 @@ static void vop2_unlock(struct vop2 *vop2) mutex_unlock(&vop2->vop2_lock); } -/* - * Note: - * The write mask function is documented but missing on rk3566/8, writes - * to these bits have no effect. For newer soc(rk3588 and following) the - * write mask is needed for register writes. - * - * GLB_CFG_DONE_EN has no write mask bit. - * - */ -static void vop2_cfg_done(struct vop2_video_port *vp) -{ - struct vop2 *vop2 = vp->vop2; - u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN; - - val |= BIT(vp->id) | (BIT(vp->id) << 16); - - regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val); -} - static void vop2_win_disable(struct vop2_win *win) { vop2_win_write(win, VOP2_WIN_ENABLE, 0); @@ -854,6 +835,11 @@ static void vop2_enable(struct vop2 *vop2) if (vop2->version == VOP_VERSION_RK3588) rk3588_vop2_power_domain_enable_all(vop2); + if (vop2->version <= VOP_VERSION_RK3588) { + vop2->old_layer_sel = vop2_readl(vop2, RK3568_OVL_LAYER_SEL); + vop2->old_port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL); + } + vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN); /* @@ -1045,7 +1031,7 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, return format; if (drm_rect_width(src) >> 16 < 4 || drm_rect_height(src) >> 16 < 4 || - drm_rect_width(dest) < 4 || drm_rect_width(dest) < 4) { + drm_rect_width(dest) < 4 || drm_rect_height(dest) < 4) { drm_err(vop2->drm, "Invalid size: %dx%d->%dx%d, min size is 4x4\n", drm_rect_width(src) >> 16, drm_rect_height(src) >> 16, drm_rect_width(dest), drm_rect_height(dest)); @@ -2422,6 +2408,10 @@ static int vop2_create_crtcs(struct vop2 *vop2) break; } } + + if (!vp->primary_plane) + return dev_err_probe(drm->dev, -ENOENT, + "no primary plane for vp %d\n", i); } /* Register all unused window as overlay plane */ @@ -2589,12 +2579,13 @@ static int vop2_win_init(struct vop2 *vop2) } /* - * The window registers are only updated when config done is written. - * Until that they read back the old value. As we read-modify-write - * these registers mark them as non-volatile. This makes sure we read - * the new values from the regmap register cache. + * The window and video port registers are only updated when config + * done is written. Until that they read back the old value. As we + * read-modify-write these registers mark them as non-volatile. This + * makes sure we read the new values from the regmap register cache. */ static const struct regmap_range vop2_nonvolatile_range[] = { + regmap_reg_range(RK3568_VP0_CTRL_BASE, RK3588_VP3_CTRL_BASE + 255), regmap_reg_range(0x1000, 0x23ff), }; @@ -2724,6 +2715,7 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) return dev_err_probe(drm->dev, vop2->irq, "cannot find irq for vop2\n"); mutex_init(&vop2->vop2_lock); + mutex_init(&vop2->ovl_lock); ret = devm_request_irq(dev, vop2->irq, vop2_isr, IRQF_SHARED, dev_name(dev), vop2); if (ret) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index fc3ecb9fcd95..9124191899ba 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -33,7 +33,6 @@ #define WIN_FEATURE_AFBDC BIT(0) #define WIN_FEATURE_CLUSTER BIT(1) -#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) /* * the delay number of a window in different mode. */ @@ -334,6 +333,19 @@ struct vop2 { /* optional internal rgb encoder */ struct rockchip_rgb *rgb; + /* + * Used to record layer selection configuration on rk356x/rk3588 + * as register RK3568_OVL_LAYER_SEL and RK3568_OVL_PORT_SEL are + * shared for all the Video Ports. + */ + u32 old_layer_sel; + u32 old_port_sel; + /* + * Ensure that the updates to these two registers(RKK3568_OVL_LAYER_SEL/RK3568_OVL_PORT_SEL) + * take effect in sequence. + */ + struct mutex ovl_lock; + /* must be put at the end of the struct */ struct vop2_win win[]; }; @@ -727,6 +739,7 @@ enum dst_factor_mode { #define RK3588_OVL_PORT_SEL__CLUSTER2 GENMASK(21, 20) #define RK3568_OVL_PORT_SEL__CLUSTER1 GENMASK(19, 18) #define RK3568_OVL_PORT_SEL__CLUSTER0 GENMASK(17, 16) +#define RK3588_OVL_PORT_SET__PORT3_MUX GENMASK(15, 12) #define RK3568_OVL_PORT_SET__PORT2_MUX GENMASK(11, 8) #define RK3568_OVL_PORT_SET__PORT1_MUX GENMASK(7, 4) #define RK3568_OVL_PORT_SET__PORT0_MUX GENMASK(3, 0) @@ -831,4 +844,23 @@ static inline struct vop2_win *to_vop2_win(struct drm_plane *p) return container_of(p, struct vop2_win, base); } +/* + * Note: + * The write mask function is documented but missing on rk3566/8, writes + * to these bits have no effect. For newer soc(rk3588 and following) the + * write mask is needed for register writes. + * + * GLB_CFG_DONE_EN has no write mask bit. + * + */ +static inline void vop2_cfg_done(struct vop2_video_port *vp) +{ + struct vop2 *vop2 = vp->vop2; + u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN; + + val |= BIT(vp->id) | (BIT(vp->id) << 16); + + regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val); +} + #endif /* _ROCKCHIP_DRM_VOP2_H */ diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index a673779de3d2..2411260db51d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -56,14 +56,13 @@ struct rockchip_lvds { struct drm_device *drm_dev; struct drm_panel *panel; struct drm_bridge *bridge; - struct drm_connector connector; struct rockchip_encoder encoder; struct dev_pin_info *pins; }; -static inline struct rockchip_lvds *connector_to_lvds(struct drm_connector *connector) +static inline struct rockchip_lvds *brige_to_lvds(struct drm_bridge *bridge) { - return container_of(connector, struct rockchip_lvds, connector); + return (struct rockchip_lvds *)bridge->driver_private; } static inline struct rockchip_lvds *encoder_to_lvds(struct drm_encoder *encoder) @@ -106,25 +105,21 @@ static inline int rockchip_lvds_name_to_output(const char *s) return -EINVAL; } -static const struct drm_connector_funcs rockchip_lvds_connector_funcs = { - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static int rockchip_lvds_connector_get_modes(struct drm_connector *connector) +static int +rockchip_lvds_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *connector) { - struct rockchip_lvds *lvds = connector_to_lvds(connector); + struct rockchip_lvds *lvds = brige_to_lvds(bridge); struct drm_panel *panel = lvds->panel; return drm_panel_get_modes(panel, connector); } static const -struct drm_connector_helper_funcs rockchip_lvds_connector_helper_funcs = { - .get_modes = rockchip_lvds_connector_get_modes, +struct drm_bridge_funcs rockchip_lvds_bridge_funcs = { + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .get_modes = rockchip_lvds_bridge_get_modes, }; static int @@ -606,26 +601,23 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, } drm_encoder_helper_add(encoder, lvds->soc_data->helper_funcs); - connector = &lvds->connector; if (lvds->panel) { - connector->dpms = DRM_MODE_DPMS_OFF; - ret = drm_connector_init(drm_dev, connector, - &rockchip_lvds_connector_funcs, - DRM_MODE_CONNECTOR_LVDS); - if (ret < 0) { - drm_err(drm_dev, - "failed to initialize connector: %d\n", ret); + lvds->bridge = drm_panel_bridge_add_typed(lvds->panel, DRM_MODE_CONNECTOR_LVDS); + if (IS_ERR(lvds->bridge)) { + ret = PTR_ERR(lvds->bridge); goto err_free_encoder; } + } - drm_connector_helper_add(connector, - &rockchip_lvds_connector_helper_funcs); - } else { - ret = drm_bridge_attach(encoder, lvds->bridge, NULL, - DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (lvds->bridge) { + lvds->bridge->driver_private = lvds; + lvds->bridge->ops = DRM_BRIDGE_OP_MODES; + lvds->bridge->funcs = &rockchip_lvds_bridge_funcs; + + ret = drm_bridge_attach(encoder, lvds->bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) - goto err_free_encoder; + goto err_free_bridge; connector = drm_bridge_connector_init(lvds->drm_dev, encoder); if (IS_ERR(connector)) { @@ -633,14 +625,14 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, "failed to initialize bridge connector: %pe\n", connector); ret = PTR_ERR(connector); - goto err_free_encoder; + goto err_free_bridge; } - } - ret = drm_connector_attach_encoder(connector, encoder); - if (ret < 0) { - drm_err(drm_dev, "failed to attach encoder: %d\n", ret); - goto err_free_connector; + ret = drm_connector_attach_encoder(connector, encoder); + if (ret < 0) { + drm_err(drm_dev, "failed to attach encoder: %d\n", ret); + goto err_free_bridge; + } } pm_runtime_enable(dev); @@ -649,8 +641,8 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, return 0; -err_free_connector: - drm_connector_cleanup(connector); +err_free_bridge: + drm_panel_bridge_remove(lvds->bridge); err_free_encoder: drm_encoder_cleanup(encoder); err_put_remote: @@ -670,8 +662,6 @@ static void rockchip_lvds_unbind(struct device *dev, struct device *master, encoder_funcs = lvds->soc_data->helper_funcs; encoder_funcs->disable(&lvds->encoder.encoder); pm_runtime_disable(dev); - drm_connector_cleanup(&lvds->connector); - drm_encoder_cleanup(&lvds->encoder.encoder); } static const struct component_ops rockchip_lvds_component_ops = { diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.h b/drivers/gpu/drm/rockchip/rockchip_lvds.h index ca83d7b6bea7..2d92447d819b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.h +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.h @@ -9,6 +9,9 @@ #ifndef _ROCKCHIP_LVDS_ #define _ROCKCHIP_LVDS_ +#include <linux/bits.h> +#include <linux/hw_bitfield.h> + #define RK3288_LVDS_CH0_REG0 0x00 #define RK3288_LVDS_CH0_REG0_LVDS_EN BIT(7) #define RK3288_LVDS_CH0_REG0_TTL_EN BIT(6) @@ -106,18 +109,16 @@ #define LVDS_VESA_18 2 #define LVDS_JEIDA_18 3 -#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) - #define PX30_LVDS_GRF_PD_VO_CON0 0x434 -#define PX30_LVDS_TIE_CLKS(val) HIWORD_UPDATE(val, 8, 8) -#define PX30_LVDS_INVERT_CLKS(val) HIWORD_UPDATE(val, 9, 9) -#define PX30_LVDS_INVERT_DCLK(val) HIWORD_UPDATE(val, 5, 5) +#define PX30_LVDS_TIE_CLKS(val) FIELD_PREP_WM16(BIT(8), (val)) +#define PX30_LVDS_INVERT_CLKS(val) FIELD_PREP_WM16(BIT(9), (val)) +#define PX30_LVDS_INVERT_DCLK(val) FIELD_PREP_WM16(BIT(5), (val)) #define PX30_LVDS_GRF_PD_VO_CON1 0x438 -#define PX30_LVDS_FORMAT(val) HIWORD_UPDATE(val, 14, 13) -#define PX30_LVDS_MODE_EN(val) HIWORD_UPDATE(val, 12, 12) -#define PX30_LVDS_MSBSEL(val) HIWORD_UPDATE(val, 11, 11) -#define PX30_LVDS_P2S_EN(val) HIWORD_UPDATE(val, 6, 6) -#define PX30_LVDS_VOP_SEL(val) HIWORD_UPDATE(val, 1, 1) +#define PX30_LVDS_FORMAT(val) FIELD_PREP_WM16(GENMASK(14, 13), (val)) +#define PX30_LVDS_MODE_EN(val) FIELD_PREP_WM16(BIT(12), (val)) +#define PX30_LVDS_MSBSEL(val) FIELD_PREP_WM16(BIT(11), (val)) +#define PX30_LVDS_P2S_EN(val) FIELD_PREP_WM16(BIT(6), (val)) +#define PX30_LVDS_VOP_SEL(val) FIELD_PREP_WM16(BIT(1), (val)) #endif /* _ROCKCHIP_LVDS_ */ diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 32c4ed685739..38c49030c7ab 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -7,6 +7,7 @@ #include <linux/bitfield.h> #include <linux/kernel.h> #include <linux/component.h> +#include <linux/hw_bitfield.h> #include <linux/mod_devicetable.h> #include <linux/platform_device.h> #include <linux/of.h> @@ -1695,8 +1696,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); val = rk3588_get_hdmi_pol(polflags); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1)); - regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(1), 1)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, + FIELD_PREP_WM16(GENMASK(6, 5), val)); break; case ROCKCHIP_VOP2_EP_HDMI1: div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; @@ -1707,8 +1709,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); val = rk3588_get_hdmi_pol(polflags); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4)); - regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(4), 1)); + regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, + FIELD_PREP_WM16(GENMASK(8, 7), val)); break; case ROCKCHIP_VOP2_EP_EDP0: div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; @@ -1718,7 +1721,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; die |= RK3588_SYS_DSP_INFACE_EN_EDP0 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(0), 1)); break; case ROCKCHIP_VOP2_EP_EDP1: div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; @@ -1728,7 +1731,7 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; die |= RK3588_SYS_DSP_INFACE_EN_EDP1 | FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); - regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3)); + regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, FIELD_PREP_WM16(BIT(3), 1)); break; case ROCKCHIP_VOP2_EP_MIPI0: div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV; @@ -2052,12 +2055,55 @@ static void vop2_setup_alpha(struct vop2_video_port *vp) } } +static u32 rk3568_vop2_read_port_mux(struct vop2 *vop2) +{ + return vop2_readl(vop2, RK3568_OVL_PORT_SEL); +} + +static void rk3568_vop2_wait_for_port_mux_done(struct vop2 *vop2) +{ + u32 port_mux_sel; + int ret; + + /* + * Spin until the previous port_mux figuration is done. + */ + ret = readx_poll_timeout_atomic(rk3568_vop2_read_port_mux, vop2, port_mux_sel, + port_mux_sel == vop2->old_port_sel, 0, 50 * 1000); + if (ret) + DRM_DEV_ERROR(vop2->dev, "wait port_mux done timeout: 0x%x--0x%x\n", + port_mux_sel, vop2->old_port_sel); +} + +static u32 rk3568_vop2_read_layer_cfg(struct vop2 *vop2) +{ + return vop2_readl(vop2, RK3568_OVL_LAYER_SEL); +} + +static void rk3568_vop2_wait_for_layer_cfg_done(struct vop2 *vop2, u32 cfg) +{ + u32 atv_layer_cfg; + int ret; + + /* + * Spin until the previous layer configuration is done. + */ + ret = readx_poll_timeout_atomic(rk3568_vop2_read_layer_cfg, vop2, atv_layer_cfg, + atv_layer_cfg == cfg, 0, 50 * 1000); + if (ret) + DRM_DEV_ERROR(vop2->dev, "wait layer cfg done timeout: 0x%x--0x%x\n", + atv_layer_cfg, cfg); +} + static void rk3568_vop2_setup_layer_mixer(struct vop2_video_port *vp) { struct vop2 *vop2 = vp->vop2; struct drm_plane *plane; u32 layer_sel = 0; u32 port_sel; + u32 old_layer_sel = 0; + u32 atv_layer_sel = 0; + u32 old_port_sel = 0; u8 layer_id; u8 old_layer_id; u8 layer_sel_id; @@ -2069,19 +2115,18 @@ static void rk3568_vop2_setup_layer_mixer(struct vop2_video_port *vp) struct vop2_video_port *vp2 = &vop2->vps[2]; struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state); + mutex_lock(&vop2->ovl_lock); ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL); ovl_ctrl &= ~RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD; ovl_ctrl &= ~RK3568_OVL_CTRL__LAYERSEL_REGDONE_SEL; - ovl_ctrl |= FIELD_PREP(RK3568_OVL_CTRL__LAYERSEL_REGDONE_SEL, vp->id); if (vcstate->yuv_overlay) ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id); else ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id); - vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl); - - port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL); + old_port_sel = vop2->old_port_sel; + port_sel = old_port_sel; port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT; if (vp0->nlayers) @@ -2102,7 +2147,13 @@ static void rk3568_vop2_setup_layer_mixer(struct vop2_video_port *vp) else port_sel |= FIELD_PREP(RK3568_OVL_PORT_SET__PORT2_MUX, 8); - layer_sel = vop2_readl(vop2, RK3568_OVL_LAYER_SEL); + /* Fixed value for rk3588 */ + if (vop2->version == VOP_VERSION_RK3588) + port_sel |= FIELD_PREP(RK3588_OVL_PORT_SET__PORT3_MUX, 7); + + atv_layer_sel = vop2_readl(vop2, RK3568_OVL_LAYER_SEL); + old_layer_sel = vop2->old_layer_sel; + layer_sel = old_layer_sel; ofs = 0; for (i = 0; i < vp->id; i++) @@ -2186,8 +2237,37 @@ static void rk3568_vop2_setup_layer_mixer(struct vop2_video_port *vp) old_win->data->layer_sel_id[vp->id]); } + vop2->old_layer_sel = layer_sel; + vop2->old_port_sel = port_sel; + /* + * As the RK3568_OVL_LAYER_SEL and RK3568_OVL_PORT_SEL are shared by all Video Ports, + * and the configuration take effect by one Video Port's vsync. + * When performing layer migration or change the zpos of layers, there are two things + * to be observed and followed: + * 1. When a layer is migrated from one VP to another, the configuration of the layer + * can only take effect after the Port mux configuration is enabled. + * + * 2. When we change the zpos of layers, we must ensure that the change for the previous + * VP takes effect before we proceed to change the next VP. Otherwise, the new + * configuration might overwrite the previous one for the previous VP, or it could + * lead to the configuration of the previous VP being take effect along with the VSYNC + * of the new VP. + */ + if (layer_sel != old_layer_sel || port_sel != old_port_sel) + ovl_ctrl |= FIELD_PREP(RK3568_OVL_CTRL__LAYERSEL_REGDONE_SEL, vp->id); + vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl); + + if (port_sel != old_port_sel) { + vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel); + vop2_cfg_done(vp); + rk3568_vop2_wait_for_port_mux_done(vop2); + } + + if (layer_sel != old_layer_sel && atv_layer_sel != old_layer_sel) + rk3568_vop2_wait_for_layer_cfg_done(vop2, vop2->old_layer_sel); + vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel); - vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel); + mutex_unlock(&vop2->ovl_lock); } static void rk3568_vop2_setup_dly_for_windows(struct vop2_video_port *vp) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5635b3a826d8..5a4697f636f2 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -285,9 +285,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) return 0; sched = entity->rq->sched; - /** - * The client will not queue more IBs during this fini, consume existing - * queued IBs or discard them on SIGKILL + /* + * The client will not queue more jobs during this fini - consume + * existing queued ones, or discard them on SIGKILL. */ if (current->flags & PF_EXITING) { if (timeout) @@ -300,7 +300,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) drm_sched_entity_is_idle(entity)); } - /* For killed process disable any more IBs enqueue right now */ + /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); if ((!last_user || last_user == current->group_leader) && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) @@ -324,9 +324,9 @@ EXPORT_SYMBOL(drm_sched_entity_flush); void drm_sched_entity_fini(struct drm_sched_entity *entity) { /* - * If consumption of existing IBs wasn't completed. Forcefully remove - * them here. Also makes sure that the scheduler won't touch this entity - * any more. + * If consumption of existing jobs wasn't completed forcefully remove + * them. Also makes sure that the scheduler won't touch this entity any + * more. */ drm_sched_entity_kill(entity); @@ -355,17 +355,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_destroy); -/* drm_sched_entity_clear_dep - callback to clear the entities dependency */ -static void drm_sched_entity_clear_dep(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_entity *entity = - container_of(cb, struct drm_sched_entity, cb); - - entity->dependency = NULL; - dma_fence_put(f); -} - /* * drm_sched_entity_wakeup - callback to clear the entity's dependency and * wake up the scheduler @@ -376,7 +365,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); - drm_sched_entity_clear_dep(f, cb); + entity->dependency = NULL; + dma_fence_put(f); drm_sched_wakeup(entity->rq->sched); } @@ -401,7 +391,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); * Add a callback to the current dependency of the entity to wake up the * scheduler when the entity becomes available. */ -static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) +static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity, + struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = entity->rq->sched; struct dma_fence *fence = entity->dependency; @@ -429,15 +420,12 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(entity->dependency); entity->dependency = fence; - if (!dma_fence_add_callback(fence, &entity->cb, - drm_sched_entity_clear_dep)) - return true; - - /* Ignore it when it is already scheduled */ - dma_fence_put(fence); - return false; } + if (trace_drm_sched_job_unschedulable_enabled() && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags)) + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, drm_sched_entity_wakeup)) return true; @@ -478,10 +466,8 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - if (drm_sched_entity_add_dependency_cb(entity)) { - trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (drm_sched_entity_add_dependency_cb(entity, sched_job)) return NULL; - } } /* skip jobs from entity that marked guilty */ diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c63543132f9d..c39f0245e3a9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -84,12 +84,6 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -#ifdef CONFIG_LOCKDEP -static struct lockdep_map drm_sched_lockdep_map = { - .name = "drm_sched_lockdep_map" -}; -#endif - int drm_sched_policy = DRM_SCHED_POLICY_FIFO; /** @@ -269,38 +263,14 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current - * entity in terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; } } list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; if (entity == rq->current_entity) break; @@ -309,6 +279,22 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, spin_unlock(&rq->lock); return NULL; + +found: + if (!drm_sched_can_queue(sched, entity)) { + /* + * If scheduler cannot take more jobs signal the caller to not + * consider lower priority queues. + */ + entity = ERR_PTR(-ENOSPC); + } else { + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + } + + spin_unlock(&rq->lock); + + return entity; } /** @@ -363,32 +349,16 @@ static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) } /** - * __drm_sched_run_free_queue - enqueue free-job work + * drm_sched_run_free_queue - enqueue free-job work * @sched: scheduler instance */ -static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) +static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) { if (!READ_ONCE(sched->pause_submit)) queue_work(sched->submit_wq, &sched->work_free_job); } /** - * drm_sched_run_free_queue - enqueue free-job work if ready - * @sched: scheduler instance - */ -static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *job; - - spin_lock(&sched->job_list_lock); - job = list_first_entry_or_null(&sched->pending_list, - struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) - __drm_sched_run_free_queue(sched); - spin_unlock(&sched->job_list_lock); -} - -/** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done * @@ -407,7 +377,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) dma_fence_get(&s_fence->finished); drm_sched_fence_finished(s_fence, result); dma_fence_put(&s_fence->finished); - __drm_sched_run_free_queue(sched); + drm_sched_run_free_queue(sched); } /** @@ -537,11 +507,37 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) spin_unlock(&sched->job_list_lock); } +/** + * drm_sched_job_reinsert_on_false_timeout - reinsert the job on a false timeout + * @sched: scheduler instance + * @job: job to be reinserted on the pending list + * + * In the case of a "false timeout" - when a timeout occurs but the GPU isn't + * hung and is making progress, the scheduler must reinsert the job back into + * @sched->pending_list. Otherwise, the job and its resources won't be freed + * through the &struct drm_sched_backend_ops.free_job callback. + * + * This function must be used in "false timeout" cases only. + */ +static void drm_sched_job_reinsert_on_false_timeout(struct drm_gpu_scheduler *sched, + struct drm_sched_job *job) +{ + spin_lock(&sched->job_list_lock); + list_add(&job->list, &sched->pending_list); + + /* After reinserting the job, the scheduler enqueues the free-job work + * again if ready. Otherwise, a signaled job could be added to the + * pending list, but never freed. + */ + drm_sched_run_free_queue(sched); + spin_unlock(&sched->job_list_lock); +} + static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); @@ -570,6 +566,9 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->free_job(job); sched->free_guilty = false; } + + if (status == DRM_GPU_SCHED_STAT_NO_HANG) + drm_sched_job_reinsert_on_false_timeout(sched, job); } else { spin_unlock(&sched->job_list_lock); } @@ -592,6 +591,10 @@ static void drm_sched_job_timedout(struct work_struct *work) * This function is typically used for reset recovery (see the docu of * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler teardown, i.e., before calling drm_sched_fini(). + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { @@ -677,6 +680,10 @@ EXPORT_SYMBOL(drm_sched_stop); * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler startup. The scheduler itself is fully operational after * drm_sched_init() succeeded. + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_start(struct drm_gpu_scheduler *sched, int errno) { @@ -958,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, dma_resv_assert_held(resv); dma_resv_for_each_fence(&cursor, resv, usage, fence) { - /* Make sure to grab an additional ref on the added fence */ - dma_fence_get(fence); - ret = drm_sched_job_add_dependency(job, fence); - if (ret) { - dma_fence_put(fence); + /* + * As drm_sched_job_add_dependency always consumes the fence + * reference (even when it fails), and dma_resv_for_each_fence + * is not obtaining one, we need to grab one before calling. + */ + ret = drm_sched_job_add_dependency(job, dma_fence_get(fence)); + if (ret) return ret; - } } return 0; } @@ -1106,12 +1114,16 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance + * @have_more: are there more finished jobs on the list + * + * Informs the caller through @have_more whether there are more finished jobs + * besides the returned one. * * Returns the next finished job from the pending list (if there is one) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) { struct drm_sched_job *job, *next; @@ -1119,22 +1131,25 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - /* make the scheduled timestamp more accurate */ + + *have_more = false; next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + /* make the scheduled timestamp more accurate */ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &next->s_fence->scheduled.flags)) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); + + *have_more = dma_fence_is_signaled(&next->s_fence->finished); + /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -1193,12 +1208,15 @@ static void drm_sched_free_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; + bool have_more; - job = drm_sched_get_finished_job(sched); - if (job) + job = drm_sched_get_finished_job(sched, &have_more); + if (job) { sched->ops->free_job(job); + if (have_more) + drm_sched_run_free_queue(sched); + } - drm_sched_run_free_queue(sched); drm_sched_run_job_queue(sched); } @@ -1261,6 +1279,25 @@ static void drm_sched_run_job_work(struct work_struct *w) drm_sched_run_job_queue(sched); } +static struct workqueue_struct *drm_sched_alloc_wq(const char *name) +{ +#if (IS_ENABLED(CONFIG_LOCKDEP)) + static struct lockdep_map map = { + .name = "drm_sched_lockdep_map" + }; + + /* + * Avoid leaking a lockdep map on each drm sched creation and + * destruction by using a single lockdep map for all drm sched + * allocated submit_wq. + */ + + return alloc_ordered_workqueue_lockdep_map(name, WQ_MEM_RECLAIM, &map); +#else + return alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); +#endif +} + /** * drm_sched_init - Init a gpu scheduler instance * @@ -1301,13 +1338,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->submit_wq = args->submit_wq; sched->own_submit_wq = false; } else { -#ifdef CONFIG_LOCKDEP - sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name, - WQ_MEM_RECLAIM, - &drm_sched_lockdep_map); -#else - sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM); -#endif + sched->submit_wq = drm_sched_alloc_wq(args->name); if (!sched->submit_wq) return -ENOMEM; @@ -1353,6 +1384,18 @@ Out_check_own: } EXPORT_SYMBOL(drm_sched_init); +static void drm_sched_cancel_remaining_jobs(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job, *tmp; + + /* All other accessors are stopped. No locking necessary. */ + list_for_each_entry_safe_reverse(job, tmp, &sched->pending_list, list) { + sched->ops->cancel_job(job); + list_del(&job->list); + sched->ops->free_job(job); + } +} + /** * drm_sched_fini - Destroy a gpu scheduler * @@ -1360,19 +1403,11 @@ EXPORT_SYMBOL(drm_sched_init); * * Tears down and cleans up the scheduler. * - * This stops submission of new jobs to the hardware through - * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job() - * will not be called for all jobs still in drm_gpu_scheduler.pending_list. - * There is no solution for this currently. Thus, it is up to the driver to make - * sure that: - * - * a) drm_sched_fini() is only called after for all submitted jobs - * drm_sched_backend_ops.free_job() has been called or that - * b) the jobs for which drm_sched_backend_ops.free_job() has not been called - * after drm_sched_fini() ran are freed manually. - * - * FIXME: Take care of the above problem and prevent this function from leaking - * the jobs in drm_gpu_scheduler.pending_list under any circumstances. + * This stops submission of new jobs to the hardware through &struct + * drm_sched_backend_ops.run_job. If &struct drm_sched_backend_ops.cancel_job + * is implemented, all jobs will be canceled through it and afterwards cleaned + * up through &struct drm_sched_backend_ops.free_job. If cancel_job is not + * implemented, memory could leak. */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { @@ -1390,6 +1425,22 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) * Prevents reinsertion and marks job_queue as idle, * it will be removed from the rq in drm_sched_entity_fini() * eventually + * + * FIXME: + * This lacks the proper spin_lock(&s_entity->lock) and + * is, therefore, a race condition. Most notably, it + * can race with drm_sched_entity_push_job(). The lock + * cannot be taken here, however, because this would + * lead to lock inversion -> deadlock. + * + * The best solution probably is to enforce the life + * time rule of all entities having to be torn down + * before their scheduler. Then, however, locking could + * be dropped alltogether from this function. + * + * For now, this remains a potential race in all + * drivers that keep entities alive for longer than + * the scheduler. */ s_entity->stopped = true; spin_unlock(&rq->lock); @@ -1402,11 +1453,18 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); + /* Avoid memory leaks if supported by the driver. */ + if (sched->ops->cancel_job) + drm_sched_cancel_remaining_jobs(sched); + if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; kfree(sched->sched_rq); sched->sched_rq = NULL; + + if (!list_empty(&sched->pending_list)) + dev_warn(sched->dev, "Tearing down scheduler while jobs are pending!\n"); } EXPORT_SYMBOL(drm_sched_fini); diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index 7f947ab9d322..8e9ae7d980eb 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -63,7 +63,7 @@ static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job) lockdep_assert_held(&sched->lock); job->flags |= DRM_MOCK_SCHED_JOB_DONE; - list_move_tail(&job->link, &sched->done_list); + list_del(&job->link); dma_fence_signal_locked(&job->hw_fence); complete(&job->done); } @@ -200,38 +200,82 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) return &job->hw_fence; } +/* + * Normally, drivers would take appropriate measures in this callback, such as + * killing the entity the faulty job is associated with, resetting the hardware + * and / or resubmitting non-faulty jobs. + * + * For the mock scheduler, there are no hardware rings to be resetted nor jobs + * to be resubmitted. Thus, this function merely ensures that + * a) timedout fences get signaled properly and removed from the pending list + * b) the mock scheduler framework gets informed about the timeout via a flag + * c) The drm_sched_job, not longer needed, gets freed + */ static enum drm_gpu_sched_stat mock_sched_timedout_job(struct drm_sched_job *sched_job) { + struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched); struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + unsigned long flags; + + if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) { + job->flags |= DRM_MOCK_SCHED_JOB_RESET_SKIPPED; + return DRM_GPU_SCHED_STAT_NO_HANG; + } - job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + spin_lock_irqsave(&sched->lock, flags); + if (!dma_fence_is_signaled_locked(&job->hw_fence)) { + list_del(&job->link); + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + dma_fence_set_error(&job->hw_fence, -ETIMEDOUT); + dma_fence_signal_locked(&job->hw_fence); + } + spin_unlock_irqrestore(&sched->lock, flags); + + dma_fence_put(&job->hw_fence); + drm_sched_job_cleanup(sched_job); + /* Mock job itself is freed by the kunit framework. */ - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void mock_sched_free_job(struct drm_sched_job *sched_job) { - struct drm_mock_scheduler *sched = - drm_sched_to_mock_sched(sched_job->sched); struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); - unsigned long flags; - /* Remove from the scheduler done list. */ - spin_lock_irqsave(&sched->lock, flags); - list_del(&job->link); - spin_unlock_irqrestore(&sched->lock, flags); dma_fence_put(&job->hw_fence); - drm_sched_job_cleanup(sched_job); /* Mock job itself is freed by the kunit framework. */ } +static void mock_sched_cancel_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + unsigned long flags; + + hrtimer_cancel(&job->timer); + + spin_lock_irqsave(&sched->lock, flags); + if (!dma_fence_is_signaled_locked(&job->hw_fence)) { + list_del(&job->link); + dma_fence_set_error(&job->hw_fence, -ECANCELED); + dma_fence_signal_locked(&job->hw_fence); + } + spin_unlock_irqrestore(&sched->lock, flags); + + /* + * The GPU Scheduler will call drm_sched_backend_ops.free_job(), still. + * Mock job itself is freed by the kunit framework. + */ +} + static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { .run_job = mock_sched_run_job, .timedout_job = mock_sched_timedout_job, - .free_job = mock_sched_free_job + .free_job = mock_sched_free_job, + .cancel_job = mock_sched_cancel_job, }; /** @@ -265,7 +309,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) sched->hw_timeline.context = dma_fence_context_alloc(1); atomic_set(&sched->hw_timeline.next_seqno, 0); INIT_LIST_HEAD(&sched->job_list); - INIT_LIST_HEAD(&sched->done_list); spin_lock_init(&sched->lock); return sched; @@ -280,38 +323,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) */ void drm_mock_sched_fini(struct drm_mock_scheduler *sched) { - struct drm_mock_sched_job *job, *next; - unsigned long flags; - LIST_HEAD(list); - - drm_sched_wqueue_stop(&sched->base); - - /* Force complete all unfinished jobs. */ - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &sched->job_list, link) - list_move_tail(&job->link, &list); - spin_unlock_irqrestore(&sched->lock, flags); - - list_for_each_entry(job, &list, link) - hrtimer_cancel(&job->timer); - - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &list, link) - drm_mock_sched_job_complete(job); - spin_unlock_irqrestore(&sched->lock, flags); - - /* - * Free completed jobs and jobs not yet processed by the DRM scheduler - * free worker. - */ - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &sched->done_list, link) - list_move_tail(&job->link, &list); - spin_unlock_irqrestore(&sched->lock, flags); - - list_for_each_entry_safe(job, next, &list, link) - mock_sched_free_job(&job->base); - drm_sched_fini(&sched->base); } diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index fbba38137f0c..7f31d35780cc 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -11,7 +11,6 @@ #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/list.h> -#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/types.h> @@ -49,7 +48,6 @@ struct drm_mock_scheduler { spinlock_t lock; struct list_head job_list; - struct list_head done_list; struct { u64 context; @@ -96,8 +94,10 @@ struct drm_mock_sched_job { struct completion done; -#define DRM_MOCK_SCHED_JOB_DONE 0x1 -#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 +#define DRM_MOCK_SCHED_JOB_DONE 0x1 +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 +#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4 +#define DRM_MOCK_SCHED_JOB_RESET_SKIPPED 0x8 unsigned long flags; struct list_head link; diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 7230057e0594..82a41a456b0a 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -5,6 +5,8 @@ #include "sched_tests.h" +#define MOCK_TIMEOUT (HZ / 5) + /* * DRM scheduler basic tests should check the basic functional correctness of * the scheduler, including some very light smoke testing. More targeted tests, @@ -28,7 +30,7 @@ static void drm_sched_basic_exit(struct kunit *test) static int drm_sched_timeout_init(struct kunit *test) { - test->priv = drm_mock_sched_new(test, HZ); + test->priv = drm_mock_sched_new(test, MOCK_TIMEOUT); return 0; } @@ -204,6 +206,47 @@ static struct kunit_suite drm_sched_basic = { .test_cases = drm_sched_basic_tests, }; +static void drm_sched_basic_cancel(struct kunit *test) +{ + struct drm_mock_sched_entity *entity; + struct drm_mock_scheduler *sched; + struct drm_mock_sched_job *job; + bool done; + + /* + * Check that drm_sched_fini() uses the cancel_job() callback to cancel + * jobs that are still pending. + */ + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + entity = drm_mock_sched_entity_new(test, DRM_SCHED_PRIORITY_NORMAL, + sched); + + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); + drm_mock_sched_fini(sched); + + KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED); +} + +static struct kunit_case drm_sched_cancel_tests[] = { + KUNIT_CASE(drm_sched_basic_cancel), + {} +}; + +static struct kunit_suite drm_sched_cancel = { + .name = "drm_sched_basic_cancel_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_cancel_tests, +}; + static void drm_sched_basic_timeout(struct kunit *test) { struct drm_mock_scheduler *sched = test->priv; @@ -227,14 +270,14 @@ static void drm_sched_basic_timeout(struct kunit *test) done = drm_mock_sched_job_wait_scheduled(job, HZ); KUNIT_ASSERT_TRUE(test, done); - done = drm_mock_sched_job_wait_finished(job, HZ / 2); + done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT / 2); KUNIT_ASSERT_FALSE(test, done); KUNIT_ASSERT_EQ(test, job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, 0); - done = drm_mock_sched_job_wait_finished(job, HZ); + done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT); KUNIT_ASSERT_FALSE(test, done); KUNIT_ASSERT_EQ(test, @@ -244,8 +287,51 @@ static void drm_sched_basic_timeout(struct kunit *test) drm_mock_sched_entity_free(entity); } +static void drm_sched_skip_reset(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + unsigned int i; + bool done; + + /* + * Submit a single job against a scheduler with the timeout configured + * and verify that if the job is still running, the timeout handler + * will skip the reset and allow the job to complete. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + job->flags = DRM_MOCK_SCHED_JOB_DONT_RESET; + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, 2 * MOCK_TIMEOUT); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_RESET_SKIPPED, + DRM_MOCK_SCHED_JOB_RESET_SKIPPED); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); +} + static struct kunit_case drm_sched_timeout_tests[] = { KUNIT_CASE(drm_sched_basic_timeout), + KUNIT_CASE(drm_sched_skip_reset), {} }; @@ -471,6 +557,7 @@ static struct kunit_suite drm_sched_credits = { kunit_test_suites(&drm_sched_basic, &drm_sched_timeout, + &drm_sched_cancel, &drm_sched_priority, &drm_sched_modify_sched, &drm_sched_credits); diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index eec846892962..a6c4a6738ded 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -68,6 +68,9 @@ #define ST7571_SET_COLOR_MODE(c) (0x10 | FIELD_PREP(GENMASK(0, 0), (c))) #define ST7571_COMMAND_SET_NORMAL (0x00) +/* ST7567 commands */ +#define ST7567_SET_LCD_BIAS(m) (0xa2 | FIELD_PREP(GENMASK(0, 0), (m))) + #define ST7571_PAGE_HEIGHT 8 #define DRIVER_NAME "st7571" @@ -92,6 +95,7 @@ struct st7571_panel_constraints { struct st7571_panel_data { int (*init)(struct st7571_device *st7571); + int (*parse_dt)(struct st7571_device *st7571); struct st7571_panel_constraints constraints; }; @@ -147,6 +151,7 @@ struct st7571_device { bool ignore_nak; bool grayscale; + bool inverted; u32 height_mm; u32 width_mm; u32 startline; @@ -214,10 +219,11 @@ static int st7571_send_command_list(struct st7571_device *st7571, return ret; } -static inline u8 st7571_transform_xy(const char *p, int x, int y) +static inline u8 st7571_transform_xy(const char *p, int x, int y, u8 bpp) { int xrest = x % 8; u8 result = 0; + u8 row_len = 16 * bpp; /* * Transforms an (x, y) pixel coordinate into a vertical 8-bit @@ -232,7 +238,7 @@ static inline u8 st7571_transform_xy(const char *p, int x, int y) for (int i = 0; i < 8; i++) { int row_idx = y + i; - u8 byte = p[row_idx * 16 + x]; + u8 byte = p[row_idx * row_len + x]; u8 bit = (byte >> xrest) & 1; result |= (bit << i); @@ -299,11 +305,11 @@ static void st7571_prepare_buffer_grayscale(struct st7571_device *st7571, struct iosys_map dst; switch (fb->format->format) { - case DRM_FORMAT_XRGB8888: /* Only support XRGB8888 in monochrome mode */ - dst_pitch = DIV_ROUND_UP(drm_rect_width(rect), 8); + case DRM_FORMAT_XRGB8888: + dst_pitch = DIV_ROUND_UP(drm_rect_width(rect), 4); iosys_map_set_vaddr(&dst, st7571->hwbuf); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); + drm_fb_xrgb8888_to_gray2(&dst, &dst_pitch, vmap, fb, rect, fmtcnv_state); break; case DRM_FORMAT_R1: @@ -329,7 +335,7 @@ static int st7571_fb_update_rect_monochrome(struct drm_framebuffer *fb, struct d for (int y = rect->y1; y < rect->y2; y += ST7571_PAGE_HEIGHT) { for (int x = rect->x1; x < rect->x2; x++) - row[x] = st7571_transform_xy(st7571->hwbuf, x, y); + row[x] = st7571_transform_xy(st7571->hwbuf, x, y, 1); st7571_set_position(st7571, rect->x1, y); @@ -354,14 +360,13 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr rect->y2 = min_t(unsigned int, round_up(rect->y2, ST7571_PAGE_HEIGHT), st7571->nlines); switch (format) { - case DRM_FORMAT_XRGB8888: - /* Threated as monochrome (R1) */ - fallthrough; case DRM_FORMAT_R1: - x1 = rect->x1; - x2 = rect->x2; + x1 = rect->x1 * 1; + x2 = rect->x2 * 1; break; case DRM_FORMAT_R2: + fallthrough; + case DRM_FORMAT_XRGB8888: x1 = rect->x1 * 2; x2 = rect->x2 * 2; break; @@ -369,7 +374,7 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr for (int y = rect->y1; y < rect->y2; y += ST7571_PAGE_HEIGHT) { for (int x = x1; x < x2; x++) - row[x] = st7571_transform_xy(st7571->hwbuf, x, y); + row[x] = st7571_transform_xy(st7571->hwbuf, x, y, 2); st7571_set_position(st7571, rect->x1, y); @@ -382,15 +387,15 @@ static int st7571_fb_update_rect_grayscale(struct drm_framebuffer *fb, struct dr * even if the format is monochrome. * * The bit values maps to the following grayscale: - * 0 0 = White - * 0 1 = Light gray - * 1 0 = Dark gray - * 1 1 = Black + * 0 0 = Black + * 0 1 = Dark gray + * 1 0 = Light gray + * 1 1 = White * * For monochrome formats, write the same value twice to get * either a black or white pixel. */ - if (format == DRM_FORMAT_R1 || format == DRM_FORMAT_XRGB8888) + if (format == DRM_FORMAT_R1) regmap_bulk_write(st7571->regmap, ST7571_DATA_MODE, row + x, 1); } } @@ -550,8 +555,8 @@ static const struct drm_crtc_funcs st7571_crtc_funcs = { * Encoder */ -static void ssd130x_encoder_atomic_enable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +static void st7571_encoder_atomic_enable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct drm_device *drm = encoder->dev; struct st7571_device *st7571 = drm_to_st7571(drm); @@ -565,8 +570,8 @@ static void ssd130x_encoder_atomic_enable(struct drm_encoder *encoder, st7571_send_command_list(st7571, &command, 1); } -static void ssd130x_encoder_atomic_disable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +static void st7571_encoder_atomic_disable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct drm_device *drm = encoder->dev; struct st7571_device *st7571 = drm_to_st7571(drm); @@ -581,8 +586,8 @@ static const struct drm_encoder_funcs st7571_encoder_funcs = { }; static const struct drm_encoder_helper_funcs st7571_encoder_helper_funcs = { - .atomic_enable = ssd130x_encoder_atomic_enable, - .atomic_disable = ssd130x_encoder_atomic_disable, + .atomic_enable = st7571_encoder_atomic_enable, + .atomic_disable = st7571_encoder_atomic_disable, }; /* @@ -773,6 +778,33 @@ static int st7571_validate_parameters(struct st7571_device *st7571) return 0; } +static int st7567_parse_dt(struct st7571_device *st7567) +{ + struct device *dev = &st7567->client->dev; + struct device_node *np = dev->of_node; + struct display_timing dt; + int ret; + + ret = of_get_display_timing(np, "panel-timing", &dt); + if (ret) { + dev_err(dev, "Failed to get display timing from DT\n"); + return ret; + } + + of_property_read_u32(np, "width-mm", &st7567->width_mm); + of_property_read_u32(np, "height-mm", &st7567->height_mm); + st7567->inverted = of_property_read_bool(np, "sitronix,inverted"); + + st7567->pformat = &st7571_monochrome; + st7567->bpp = 1; + + st7567->startline = dt.vfront_porch.typ; + st7567->nlines = dt.vactive.typ; + st7567->ncols = dt.hactive.typ; + + return 0; +} + static int st7571_parse_dt(struct st7571_device *st7571) { struct device *dev = &st7571->client->dev; @@ -789,6 +821,7 @@ static int st7571_parse_dt(struct st7571_device *st7571) of_property_read_u32(np, "width-mm", &st7571->width_mm); of_property_read_u32(np, "height-mm", &st7571->height_mm); st7571->grayscale = of_property_read_bool(np, "sitronix,grayscale"); + st7571->inverted = of_property_read_bool(np, "sitronix,inverted"); if (st7571->grayscale) { st7571->pformat = &st7571_grayscale; @@ -804,7 +837,9 @@ static int st7571_parse_dt(struct st7571_device *st7571) st7571->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(st7571->reset)) - return PTR_ERR(st7571->reset); + return dev_err_probe(dev, PTR_ERR(st7571->reset), + "Failed to get reset gpio\n"); + return 0; } @@ -816,6 +851,38 @@ static void st7571_reset(struct st7571_device *st7571) gpiod_set_value_cansleep(st7571->reset, 0); } +static int st7567_lcd_init(struct st7571_device *st7567) +{ + /* + * Most of the initialization sequence is taken directly from the + * referential initial code in the ST7567 datasheet. + */ + u8 commands[] = { + ST7571_DISPLAY_OFF, + + ST7567_SET_LCD_BIAS(1), + + ST7571_SET_SEG_SCAN_DIR(0), + ST7571_SET_COM_SCAN_DIR(1), + + ST7571_SET_REGULATOR_REG(4), + ST7571_SET_CONTRAST_MSB, + ST7571_SET_CONTRAST_LSB(0x20), + + ST7571_SET_START_LINE_MSB, + ST7571_SET_START_LINE_LSB(st7567->startline), + + ST7571_SET_POWER(0x4), /* Power Control, VC: ON, VR: OFF, VF: OFF */ + ST7571_SET_POWER(0x6), /* Power Control, VC: ON, VR: ON, VF: OFF */ + ST7571_SET_POWER(0x7), /* Power Control, VC: ON, VR: ON, VF: ON */ + + ST7571_SET_REVERSE(st7567->inverted ? 1 : 0), + ST7571_SET_ENTIRE_DISPLAY_ON(0), + }; + + return st7571_send_command_list(st7567, commands, ARRAY_SIZE(commands)); +} + static int st7571_lcd_init(struct st7571_device *st7571) { /* @@ -853,7 +920,7 @@ static int st7571_lcd_init(struct st7571_device *st7571) ST7571_SET_COLOR_MODE(st7571->pformat->mode), ST7571_COMMAND_SET_NORMAL, - ST7571_SET_REVERSE(0), + ST7571_SET_REVERSE(st7571->inverted ? 1 : 0), ST7571_SET_ENTIRE_DISPLAY_ON(0), }; @@ -879,7 +946,7 @@ static int st7571_probe(struct i2c_client *client) i2c_set_clientdata(client, st7571); st7571->pdata = device_get_match_data(&client->dev); - ret = st7571_parse_dt(st7571); + ret = st7571->pdata->parse_dt(st7571); if (ret) return ret; @@ -960,8 +1027,21 @@ static void st7571_remove(struct i2c_client *client) drm_dev_unplug(&st7571->dev); } -struct st7571_panel_data st7571_config = { +static const struct st7571_panel_data st7567_config = { + .init = st7567_lcd_init, + .parse_dt = st7567_parse_dt, + .constraints = { + .min_nlines = 1, + .max_nlines = 64, + .min_ncols = 128, + .max_ncols = 128, + .support_grayscale = false, + }, +}; + +static const struct st7571_panel_data st7571_config = { .init = st7571_lcd_init, + .parse_dt = st7571_parse_dt, .constraints = { .min_nlines = 1, .max_nlines = 128, @@ -972,12 +1052,14 @@ struct st7571_panel_data st7571_config = { }; static const struct of_device_id st7571_of_match[] = { + { .compatible = "sitronix,st7567", .data = &st7567_config }, { .compatible = "sitronix,st7571", .data = &st7571_config }, {}, }; MODULE_DEVICE_TABLE(of, st7571_of_match); static const struct i2c_device_id st7571_id[] = { + { "st7567", 0 }, { "st7571", 0 }, { } }; diff --git a/drivers/gpu/drm/solomon/ssd130x-spi.c b/drivers/gpu/drm/solomon/ssd130x-spi.c index 7c935870f7d2..b52f5fd592a1 100644 --- a/drivers/gpu/drm/solomon/ssd130x-spi.c +++ b/drivers/gpu/drm/solomon/ssd130x-spi.c @@ -74,8 +74,7 @@ static int ssd130x_spi_probe(struct spi_device *spi) t = devm_kzalloc(dev, sizeof(*t), GFP_KERNEL); if (!t) - return dev_err_probe(dev, -ENOMEM, - "Failed to allocate SPI transport data\n"); + return -ENOMEM; t->spi = spi; t->dc = dc; diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index dd2006d51c7a..eec43d1a5595 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) { - unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH); + unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH); unsigned int height = ssd130x->height; memset(data_array, 0, columns * height); diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index d202b6c1eb8f..2c015f563de9 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -246,6 +246,7 @@ struct sti_hda { struct device dev; struct drm_device *drm_dev; struct drm_display_mode mode; + struct drm_bridge bridge; void __iomem *regs; void __iomem *video_dacs_ctrl; struct clk *clk_pix; @@ -262,6 +263,11 @@ struct sti_hda_connector { #define to_sti_hda_connector(x) \ container_of(x, struct sti_hda_connector, drm_connector) +static struct sti_hda *drm_bridge_to_sti_hda(struct drm_bridge *bridge) +{ + return container_of(bridge, struct sti_hda, bridge); +} + static u32 hda_read(struct sti_hda *hda, int offset) { return readl(hda->regs + offset); @@ -401,7 +407,7 @@ static void sti_hda_configure_awg(struct sti_hda *hda, u32 *awg_instr, int nb) static void sti_hda_disable(struct drm_bridge *bridge) { - struct sti_hda *hda = bridge->driver_private; + struct sti_hda *hda = drm_bridge_to_sti_hda(bridge); u32 val; if (!hda->enabled) @@ -426,7 +432,7 @@ static void sti_hda_disable(struct drm_bridge *bridge) static void sti_hda_pre_enable(struct drm_bridge *bridge) { - struct sti_hda *hda = bridge->driver_private; + struct sti_hda *hda = drm_bridge_to_sti_hda(bridge); u32 val, i, mode_idx; u32 src_filter_y, src_filter_c; u32 *coef_y, *coef_c; @@ -517,7 +523,7 @@ static void sti_hda_set_mode(struct drm_bridge *bridge, const struct drm_display_mode *mode, const struct drm_display_mode *adjusted_mode) { - struct sti_hda *hda = bridge->driver_private; + struct sti_hda *hda = drm_bridge_to_sti_hda(bridge); u32 mode_idx; int hddac_rate; int ret; @@ -677,7 +683,6 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data) struct drm_encoder *encoder; struct sti_hda_connector *connector; struct drm_connector *drm_connector; - struct drm_bridge *bridge; int err; /* Set the drm device handle */ @@ -693,13 +698,7 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data) connector->hda = hda; - bridge = devm_kzalloc(dev, sizeof(*bridge), GFP_KERNEL); - if (!bridge) - return -ENOMEM; - - bridge->driver_private = hda; - bridge->funcs = &sti_hda_bridge_funcs; - drm_bridge_attach(encoder, bridge, NULL, 0); + drm_bridge_attach(encoder, &hda->bridge, NULL, 0); connector->encoder = encoder; @@ -745,9 +744,9 @@ static int sti_hda_probe(struct platform_device *pdev) DRM_INFO("%s\n", __func__); - hda = devm_kzalloc(dev, sizeof(*hda), GFP_KERNEL); - if (!hda) - return -ENOMEM; + hda = devm_drm_bridge_alloc(dev, struct sti_hda, bridge, &sti_hda_bridge_funcs); + if (IS_ERR(hda)) + return PTR_ERR(hda); hda->dev = pdev->dev; hda->regs = devm_platform_ioremap_resource_byname(pdev, "hda-reg"); diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index 37b8d619066e..4e7c3d78b2b9 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -168,6 +168,11 @@ struct sti_hdmi_connector { #define to_sti_hdmi_connector(x) \ container_of(x, struct sti_hdmi_connector, drm_connector) +static struct sti_hdmi *drm_bridge_to_sti_hdmi(struct drm_bridge *bridge) +{ + return container_of(bridge, struct sti_hdmi, bridge); +} + static const struct drm_prop_enum_list colorspace_mode_names[] = { { HDMI_COLORSPACE_RGB, "rgb" }, { HDMI_COLORSPACE_YUV422, "yuv422" }, @@ -749,7 +754,7 @@ static void hdmi_debugfs_init(struct sti_hdmi *hdmi, struct drm_minor *minor) static void sti_hdmi_disable(struct drm_bridge *bridge) { - struct sti_hdmi *hdmi = bridge->driver_private; + struct sti_hdmi *hdmi = drm_bridge_to_sti_hdmi(bridge); u32 val = hdmi_read(hdmi, HDMI_CFG); @@ -881,7 +886,7 @@ static int hdmi_audio_configure(struct sti_hdmi *hdmi) static void sti_hdmi_pre_enable(struct drm_bridge *bridge) { - struct sti_hdmi *hdmi = bridge->driver_private; + struct sti_hdmi *hdmi = drm_bridge_to_sti_hdmi(bridge); DRM_DEBUG_DRIVER("\n"); @@ -936,7 +941,7 @@ static void sti_hdmi_set_mode(struct drm_bridge *bridge, const struct drm_display_mode *mode, const struct drm_display_mode *adjusted_mode) { - struct sti_hdmi *hdmi = bridge->driver_private; + struct sti_hdmi *hdmi = drm_bridge_to_sti_hdmi(bridge); int ret; DRM_DEBUG_DRIVER("\n"); @@ -1273,7 +1278,6 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data) struct sti_hdmi_connector *connector; struct cec_connector_info conn_info; struct drm_connector *drm_connector; - struct drm_bridge *bridge; int err; /* Set the drm device handle */ @@ -1289,13 +1293,7 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data) connector->hdmi = hdmi; - bridge = devm_kzalloc(dev, sizeof(*bridge), GFP_KERNEL); - if (!bridge) - return -EINVAL; - - bridge->driver_private = hdmi; - bridge->funcs = &sti_hdmi_bridge_funcs; - drm_bridge_attach(encoder, bridge, NULL, 0); + drm_bridge_attach(encoder, &hdmi->bridge, NULL, 0); connector->encoder = encoder; @@ -1385,9 +1383,9 @@ static int sti_hdmi_probe(struct platform_device *pdev) DRM_INFO("%s\n", __func__); - hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL); - if (!hdmi) - return -ENOMEM; + hdmi = devm_drm_bridge_alloc(dev, struct sti_hdmi, bridge, &sti_hdmi_bridge_funcs); + if (IS_ERR(hdmi)) + return PTR_ERR(hdmi); ddc = of_parse_phandle(pdev->dev.of_node, "ddc", 0); if (ddc) { diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h index 6d4c3f57bc46..91d43dd46f13 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.h +++ b/drivers/gpu/drm/sti/sti_hdmi.h @@ -12,6 +12,7 @@ #include <media/cec-notifier.h> +#include <drm/drm_bridge.h> #include <drm/drm_modes.h> #include <drm/drm_property.h> @@ -86,6 +87,7 @@ struct sti_hdmi { struct hdmi_audio_params audio; struct drm_connector *drm_connector; struct cec_notifier *notifier; + struct drm_bridge bridge; }; u32 hdmi_read(struct sti_hdmi *hdmi, int offset); diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c index 03684062309b..b76606e9a82d 100644 --- a/drivers/gpu/drm/sti/sti_hqvdp.c +++ b/drivers/gpu/drm/sti/sti_hqvdp.c @@ -744,7 +744,7 @@ static bool sti_hqvdp_check_hw_scaling(struct sti_hqvdp *hqvdp, inv_zy = DIV_ROUND_UP(src_h, dst_h); - return (inv_zy <= lfw) ? true : false; + return inv_zy <= lfw; } /** diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index 8ebcaf953782..ab00d1a6140c 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -236,8 +236,18 @@ static void stm_drm_platform_shutdown(struct platform_device *pdev) drm_atomic_helper_shutdown(platform_get_drvdata(pdev)); } +static struct ltdc_plat_data stm_drm_plat_data = { + .pad_max_freq_hz = 90000000, +}; + +static struct ltdc_plat_data stm_drm_plat_data_mp25 = { + .pad_max_freq_hz = 150000000, +}; + static const struct of_device_id drv_dt_ids[] = { - { .compatible = "st,stm32-ltdc"}, + { .compatible = "st,stm32-ltdc", .data = &stm_drm_plat_data, }, + { .compatible = "st,stm32mp251-ltdc", .data = &stm_drm_plat_data_mp25, }, + { .compatible = "st,stm32mp255-ltdc", .data = &stm_drm_plat_data_mp25, }, { /* end node */ }, }; MODULE_DEVICE_TABLE(of, drv_dt_ids); diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index ba315c66a04d..d1501e86a5b1 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/of_graph.h> #include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> @@ -51,6 +52,7 @@ #define HWVER_10300 0x010300 #define HWVER_20101 0x020101 #define HWVER_40100 0x040100 +#define HWVER_40101 0x040101 /* * The address of some registers depends on the HW version: such registers have @@ -641,7 +643,7 @@ static inline void ltdc_set_ycbcr_config(struct drm_plane *plane, u32 drm_pix_fm break; default: /* RGB or not a YCbCr supported format */ - DRM_ERROR("Unsupported pixel format: %u\n", drm_pix_fmt); + drm_err(plane->dev, "Unsupported pixel format: %u\n", drm_pix_fmt); return; } @@ -664,18 +666,19 @@ static inline void ltdc_set_ycbcr_coeffs(struct drm_plane *plane) u32 lofs = plane->index * LAY_OFS; if (enc != DRM_COLOR_YCBCR_BT601 && enc != DRM_COLOR_YCBCR_BT709) { - DRM_ERROR("color encoding %d not supported, use bt601 by default\n", enc); + drm_err(plane->dev, "color encoding %d not supported, use bt601 by default\n", enc); /* set by default color encoding to DRM_COLOR_YCBCR_BT601 */ enc = DRM_COLOR_YCBCR_BT601; } if (ran != DRM_COLOR_YCBCR_LIMITED_RANGE && ran != DRM_COLOR_YCBCR_FULL_RANGE) { - DRM_ERROR("color range %d not supported, use limited range by default\n", ran); + drm_err(plane->dev, + "color range %d not supported, use limited range by default\n", ran); /* set by default color range to DRM_COLOR_YCBCR_LIMITED_RANGE */ ran = DRM_COLOR_YCBCR_LIMITED_RANGE; } - DRM_DEBUG_DRIVER("Color encoding=%d, range=%d\n", enc, ran); + drm_err(plane->dev, "Color encoding=%d, range=%d\n", enc, ran); regmap_write(ldev->regmap, LTDC_L1CYR0R + lofs, ltdc_ycbcr2rgb_coeffs[enc][ran][0]); regmap_write(ldev->regmap, LTDC_L1CYR1R + lofs, @@ -774,7 +777,7 @@ static void ltdc_crtc_atomic_enable(struct drm_crtc *crtc, struct ltdc_device *ldev = crtc_to_ltdc(crtc); struct drm_device *ddev = crtc->dev; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); pm_runtime_get_sync(ddev->dev); @@ -798,7 +801,7 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_device *ddev = crtc->dev; int layer_index = 0; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); drm_crtc_vblank_off(crtc); @@ -835,9 +838,15 @@ ltdc_crtc_mode_valid(struct drm_crtc *crtc, int target_max = target + CLK_TOLERANCE_HZ; int result; + if (ldev->lvds_clk) { + result = clk_round_rate(ldev->lvds_clk, target); + drm_dbg_driver(crtc->dev, "lvds pixclk rate target %d, available %d\n", + target, result); + } + result = clk_round_rate(ldev->pixel_clk, target); - DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result); + drm_dbg_driver(crtc->dev, "clk rate target %d, available %d\n", target, result); /* Filter modes according to the max frequency supported by the pads */ if (result > ldev->caps.pad_max_freq_hz) @@ -872,14 +881,14 @@ static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc, int rate = mode->clock * 1000; if (clk_set_rate(ldev->pixel_clk, rate) < 0) { - DRM_ERROR("Cannot set rate (%dHz) for pixel clk\n", rate); + drm_err(crtc->dev, "Cannot set rate (%dHz) for pixel clk\n", rate); return false; } adjusted_mode->clock = clk_get_rate(ldev->pixel_clk) / 1000; - DRM_DEBUG_DRIVER("requested clock %dkHz, adjusted clock %dkHz\n", - mode->clock, adjusted_mode->clock); + drm_dbg_driver(crtc->dev, "requested clock %dkHz, adjusted clock %dkHz\n", + mode->clock, adjusted_mode->clock); return true; } @@ -934,20 +943,20 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc) if (!pm_runtime_active(ddev->dev)) { ret = pm_runtime_get_sync(ddev->dev); if (ret) { - DRM_ERROR("Failed to set mode, cannot get sync\n"); + drm_err(crtc->dev, "Failed to set mode, cannot get sync\n"); return; } } - DRM_DEBUG_DRIVER("CRTC:%d mode:%s\n", crtc->base.id, mode->name); - DRM_DEBUG_DRIVER("Video mode: %dx%d", mode->hdisplay, mode->vdisplay); - DRM_DEBUG_DRIVER(" hfp %d hbp %d hsl %d vfp %d vbp %d vsl %d\n", - mode->hsync_start - mode->hdisplay, - mode->htotal - mode->hsync_end, - mode->hsync_end - mode->hsync_start, - mode->vsync_start - mode->vdisplay, - mode->vtotal - mode->vsync_end, - mode->vsync_end - mode->vsync_start); + drm_dbg_driver(crtc->dev, "CRTC:%d mode:%s\n", crtc->base.id, mode->name); + drm_dbg_driver(crtc->dev, "Video mode: %dx%d", mode->hdisplay, mode->vdisplay); + drm_dbg_driver(crtc->dev, " hfp %d hbp %d hsl %d vfp %d vbp %d vsl %d\n", + mode->hsync_start - mode->hdisplay, + mode->htotal - mode->hsync_end, + mode->hsync_end - mode->hsync_start, + mode->vsync_start - mode->vdisplay, + mode->vtotal - mode->vsync_end, + mode->vsync_end - mode->vsync_start); /* Convert video timings to ltdc timings */ hsync = mode->hsync_end - mode->hsync_start - 1; @@ -1033,7 +1042,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_device *ddev = crtc->dev; struct drm_pending_vblank_event *event = crtc->state->event; - DRM_DEBUG_ATOMIC("\n"); + drm_dbg_atomic(crtc->dev, "\n"); ltdc_crtc_update_clut(crtc); @@ -1121,7 +1130,7 @@ static int ltdc_crtc_enable_vblank(struct drm_crtc *crtc) struct ltdc_device *ldev = crtc_to_ltdc(crtc); struct drm_crtc_state *state = crtc->state; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); if (state->enable) regmap_set_bits(ldev->regmap, LTDC_IER, IER_LIE); @@ -1135,7 +1144,7 @@ static void ltdc_crtc_disable_vblank(struct drm_crtc *crtc) { struct ltdc_device *ldev = crtc_to_ltdc(crtc); - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(crtc->dev, "\n"); regmap_clear_bits(ldev->regmap, LTDC_IER, IER_LIE); } @@ -1144,11 +1153,11 @@ static int ltdc_crtc_set_crc_source(struct drm_crtc *crtc, const char *source) struct ltdc_device *ldev; int ret; - DRM_DEBUG_DRIVER("\n"); - if (!crtc) return -ENODEV; + drm_dbg_driver(crtc->dev, "\n"); + ldev = crtc_to_ltdc(crtc); if (source && strcmp(source, "auto") == 0) { @@ -1168,14 +1177,14 @@ static int ltdc_crtc_set_crc_source(struct drm_crtc *crtc, const char *source) static int ltdc_crtc_verify_crc_source(struct drm_crtc *crtc, const char *source, size_t *values_cnt) { - DRM_DEBUG_DRIVER("\n"); - if (!crtc) return -ENODEV; + drm_dbg_driver(crtc->dev, "\n"); + if (source && strcmp(source, "auto") != 0) { - DRM_DEBUG_DRIVER("Unknown CRC source %s for %s\n", - source, crtc->name); + drm_dbg_driver(crtc->dev, "Unknown CRC source %s for %s\n", + source, crtc->name); return -EINVAL; } @@ -1233,7 +1242,7 @@ static int ltdc_plane_atomic_check(struct drm_plane *plane, struct drm_framebuffer *fb = new_plane_state->fb; u32 src_w, src_h; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(plane->dev, "\n"); if (!fb) return 0; @@ -1244,7 +1253,7 @@ static int ltdc_plane_atomic_check(struct drm_plane *plane, /* Reject scaling */ if (src_w != new_plane_state->crtc_w || src_h != new_plane_state->crtc_h) { - DRM_DEBUG_DRIVER("Scaling is not supported"); + drm_dbg_driver(plane->dev, "Scaling is not supported"); return -EINVAL; } @@ -1270,7 +1279,7 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, enum ltdc_pix_fmt pf; if (!newstate->crtc || !fb) { - DRM_DEBUG_DRIVER("fb or crtc NULL"); + drm_dbg_driver(plane->dev, "fb or crtc NULL"); return; } @@ -1280,11 +1289,11 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, src_w = newstate->src_w >> 16; src_h = newstate->src_h >> 16; - DRM_DEBUG_DRIVER("plane:%d fb:%d (%dx%d)@(%d,%d) -> (%dx%d)@(%d,%d)\n", - plane->base.id, fb->base.id, - src_w, src_h, src_x, src_y, - newstate->crtc_w, newstate->crtc_h, - newstate->crtc_x, newstate->crtc_y); + drm_dbg_driver(plane->dev, "plane:%d fb:%d (%dx%d)@(%d,%d) -> (%dx%d)@(%d,%d)\n", + plane->base.id, fb->base.id, + src_w, src_h, src_x, src_y, + newstate->crtc_w, newstate->crtc_h, + newstate->crtc_x, newstate->crtc_y); regmap_read(ldev->regmap, LTDC_BPCR, &bpcr); @@ -1312,8 +1321,8 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, val = ltdc_set_flexible_pixel_format(plane, pf); if (val == NB_PF) { - DRM_ERROR("Pixel format %.4s not supported\n", - (char *)&fb->format->format); + drm_err(fb->dev, "Pixel format %.4s not supported\n", + (char *)&fb->format->format); val = 0; /* set by default ARGB 32 bits */ } regmap_write_bits(ldev->regmap, LTDC_L1PFCR + lofs, LXPFCR_PF, val); @@ -1350,7 +1359,7 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, if (newstate->rotation & DRM_MODE_REFLECT_Y) paddr += (fb->pitches[0] * (y1 - y0)); - DRM_DEBUG_DRIVER("fb: phys 0x%08x", paddr); + drm_dbg_driver(fb->dev, "fb: phys 0x%08x", paddr); regmap_write(ldev->regmap, LTDC_L1CFBAR + lofs, paddr); /* Configures the color frame buffer pitch in bytes & line length */ @@ -1517,8 +1526,8 @@ static void ltdc_plane_atomic_disable(struct drm_plane *plane, regmap_write_bits(ldev->regmap, LTDC_L1RCR + lofs, LXRCR_IMR | LXRCR_VBR | LXRCR_GRMSK, LXRCR_VBR); - DRM_DEBUG_DRIVER("CRTC:%d plane:%d\n", - oldstate->crtc->base.id, plane->base.id); + drm_dbg_driver(plane->dev, "CRTC:%d plane:%d\n", + oldstate->crtc->base.id, plane->base.id); } static void ltdc_plane_atomic_print_state(struct drm_printer *p, @@ -1632,7 +1641,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, drm_plane_create_alpha_property(plane); - DRM_DEBUG_DRIVER("plane:%d created\n", plane->base.id); + drm_dbg_driver(plane->dev, "plane:%d created\n", plane->base.id); return plane; } @@ -1647,7 +1656,7 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) primary = ltdc_plane_create(ddev, DRM_PLANE_TYPE_PRIMARY, 0); if (!primary) { - DRM_ERROR("Can not create primary plane\n"); + drm_err(ddev, "Can not create primary plane\n"); return -EINVAL; } @@ -1668,7 +1677,7 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) ret = drmm_crtc_init_with_planes(ddev, crtc, primary, NULL, <dc_crtc_funcs, NULL); if (ret) { - DRM_ERROR("Can not initialize CRTC\n"); + drm_err(ddev, "Can not initialize CRTC\n"); return ret; } @@ -1677,13 +1686,13 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) drm_mode_crtc_set_gamma_size(crtc, CLUT_SIZE); drm_crtc_enable_color_mgmt(crtc, 0, false, CLUT_SIZE); - DRM_DEBUG_DRIVER("CRTC:%d created\n", crtc->base.id); + drm_dbg_driver(ddev, "CRTC:%d created\n", crtc->base.id); /* Add planes. Note : the first layer is used by primary plane */ for (i = 1; i < ldev->caps.nb_layers; i++) { overlay = ltdc_plane_create(ddev, DRM_PLANE_TYPE_OVERLAY, i); if (!overlay) { - DRM_ERROR("Can not create overlay plane %d\n", i); + drm_err(ddev, "Can not create overlay plane %d\n", i); return -ENOMEM; } if (ldev->caps.dynamic_zorder) @@ -1704,7 +1713,7 @@ static void ltdc_encoder_disable(struct drm_encoder *encoder) struct drm_device *ddev = encoder->dev; struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* Disable LTDC */ regmap_clear_bits(ldev->regmap, LTDC_GCR, GCR_LTDCEN); @@ -1718,7 +1727,7 @@ static void ltdc_encoder_enable(struct drm_encoder *encoder) struct drm_device *ddev = encoder->dev; struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* set fifo underrun threshold register */ if (ldev->caps.fifo_threshold) @@ -1734,7 +1743,7 @@ static void ltdc_encoder_mode_set(struct drm_encoder *encoder, { struct drm_device *ddev = encoder->dev; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(encoder->dev, "\n"); /* * Set to default state the pinctrl only with DPI type. @@ -1770,7 +1779,7 @@ static int ltdc_encoder_init(struct drm_device *ddev, struct drm_bridge *bridge) if (ret) return ret; - DRM_DEBUG_DRIVER("Bridge encoder:%d created\n", encoder->base.id); + drm_dbg_driver(encoder->dev, "Bridge encoder:%d created\n", encoder->base.id); return 0; } @@ -1779,6 +1788,7 @@ static int ltdc_get_caps(struct drm_device *ddev) { struct ltdc_device *ldev = ddev->dev_private; u32 bus_width_log2, lcr, gc2r; + const struct ltdc_plat_data *pdata = of_device_get_match_data(ddev->dev); /* * at least 1 layer must be managed & the number of layers @@ -1794,6 +1804,8 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.bus_width = 8 << bus_width_log2; regmap_read(ldev->regmap, LTDC_IDR, &ldev->caps.hw_version); + ldev->caps.pad_max_freq_hz = pdata->pad_max_freq_hz; + switch (ldev->caps.hw_version) { case HWVER_10200: case HWVER_10300: @@ -1811,7 +1823,6 @@ static int ltdc_get_caps(struct drm_device *ddev) * does not work on 2nd layer. */ ldev->caps.non_alpha_only_l1 = true; - ldev->caps.pad_max_freq_hz = 90000000; if (ldev->caps.hw_version == HWVER_10200) ldev->caps.pad_max_freq_hz = 65000000; ldev->caps.nb_irq = 2; @@ -1842,6 +1853,7 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.fifo_threshold = false; break; case HWVER_40100: + case HWVER_40101: ldev->caps.layer_ofs = LAY_OFS_1; ldev->caps.layer_regs = ltdc_layer_regs_a2; ldev->caps.pix_fmt_hw = ltdc_pix_fmt_a2; @@ -1849,7 +1861,6 @@ static int ltdc_get_caps(struct drm_device *ddev) ldev->caps.pix_fmt_nb = ARRAY_SIZE(ltdc_drm_fmt_a2); ldev->caps.pix_fmt_flex = true; ldev->caps.non_alpha_only_l1 = false; - ldev->caps.pad_max_freq_hz = 90000000; ldev->caps.nb_irq = 2; ldev->caps.ycbcr_input = true; ldev->caps.ycbcr_output = true; @@ -1870,8 +1881,12 @@ void ltdc_suspend(struct drm_device *ddev) { struct ltdc_device *ldev = ddev->dev_private; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + if (ldev->lvds_clk) + clk_disable_unprepare(ldev->lvds_clk); } int ltdc_resume(struct drm_device *ddev) @@ -1879,15 +1894,29 @@ int ltdc_resume(struct drm_device *ddev) struct ltdc_device *ldev = ddev->dev_private; int ret; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); ret = clk_prepare_enable(ldev->pixel_clk); if (ret) { - DRM_ERROR("failed to enable pixel clock (%d)\n", ret); + drm_err(ddev, "failed to enable pixel clock (%d)\n", ret); return ret; } - return 0; + if (ldev->bus_clk) { + ret = clk_prepare_enable(ldev->bus_clk); + if (ret) { + drm_err(ddev, "failed to enable bus clock (%d)\n", ret); + return ret; + } + } + + if (ldev->lvds_clk) { + ret = clk_prepare_enable(ldev->lvds_clk); + if (ret) + drm_err(ddev, "failed to prepare lvds clock\n"); + } + + return ret; } int ltdc_load(struct drm_device *ddev) @@ -1903,7 +1932,7 @@ int ltdc_load(struct drm_device *ddev) int irq, i, nb_endpoints; int ret = -ENODEV; - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); /* Get number of endpoints */ nb_endpoints = of_graph_get_endpoint_count(np); @@ -1913,15 +1942,29 @@ int ltdc_load(struct drm_device *ddev) ldev->pixel_clk = devm_clk_get(dev, "lcd"); if (IS_ERR(ldev->pixel_clk)) { if (PTR_ERR(ldev->pixel_clk) != -EPROBE_DEFER) - DRM_ERROR("Unable to get lcd clock\n"); + drm_err(ddev, "Unable to get lcd clock\n"); return PTR_ERR(ldev->pixel_clk); } if (clk_prepare_enable(ldev->pixel_clk)) { - DRM_ERROR("Unable to prepare pixel clock\n"); + drm_err(ddev, "Unable to prepare pixel clock\n"); return -ENODEV; } + if (of_device_is_compatible(np, "st,stm32mp251-ltdc") || + of_device_is_compatible(np, "st,stm32mp255-ltdc")) { + ldev->bus_clk = devm_clk_get(dev, "bus"); + if (IS_ERR(ldev->bus_clk)) + return dev_err_probe(dev, PTR_ERR(ldev->bus_clk), + "Unable to get bus clock\n"); + + ret = clk_prepare_enable(ldev->bus_clk); + if (ret) { + drm_err(ddev, "Unable to prepare bus clock\n"); + return ret; + } + } + /* Get endpoints if any */ for (i = 0; i < nb_endpoints; i++) { ret = drm_of_find_panel_or_bridge(np, 0, i, &panel, &bridge); @@ -1939,7 +1982,7 @@ int ltdc_load(struct drm_device *ddev) if (panel) { bridge = drmm_panel_bridge_add(ddev, panel); if (IS_ERR(bridge)) { - DRM_ERROR("panel-bridge endpoint %d\n", i); + drm_err(ddev, "panel-bridge endpoint %d\n", i); ret = PTR_ERR(bridge); goto err; } @@ -1949,12 +1992,16 @@ int ltdc_load(struct drm_device *ddev) ret = ltdc_encoder_init(ddev, bridge); if (ret) { if (ret != -EPROBE_DEFER) - DRM_ERROR("init encoder endpoint %d\n", i); + drm_err(ddev, "init encoder endpoint %d\n", i); goto err; } } } + ldev->lvds_clk = devm_clk_get(dev, "lvds"); + if (IS_ERR(ldev->lvds_clk)) + ldev->lvds_clk = NULL; + rstc = devm_reset_control_get_exclusive(dev, NULL); mutex_init(&ldev->err_lock); @@ -1967,29 +2014,29 @@ int ltdc_load(struct drm_device *ddev) ldev->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ldev->regs)) { - DRM_ERROR("Unable to get ltdc registers\n"); + drm_err(ddev, "Unable to get ltdc registers\n"); ret = PTR_ERR(ldev->regs); goto err; } ldev->regmap = devm_regmap_init_mmio(&pdev->dev, ldev->regs, &stm32_ltdc_regmap_cfg); if (IS_ERR(ldev->regmap)) { - DRM_ERROR("Unable to regmap ltdc registers\n"); + drm_err(ddev, "Unable to regmap ltdc registers\n"); ret = PTR_ERR(ldev->regmap); goto err; } ret = ltdc_get_caps(ddev); if (ret) { - DRM_ERROR("hardware identifier (0x%08x) not supported!\n", - ldev->caps.hw_version); + drm_err(ddev, "hardware identifier (0x%08x) not supported!\n", + ldev->caps.hw_version); goto err; } /* Disable all interrupts */ regmap_clear_bits(ldev->regmap, LTDC_IER, IER_MASK); - DRM_DEBUG_DRIVER("ltdc hw version 0x%08x\n", ldev->caps.hw_version); + drm_dbg_driver(ddev, "ltdc hw version 0x%08x\n", ldev->caps.hw_version); /* initialize default value for fifo underrun threshold & clear interrupt error counters */ ldev->transfer_err = 0; @@ -2008,32 +2055,35 @@ int ltdc_load(struct drm_device *ddev) ltdc_irq_thread, IRQF_ONESHOT, dev_name(dev), ddev); if (ret) { - DRM_ERROR("Failed to register LTDC interrupt\n"); + drm_err(ddev, "Failed to register LTDC interrupt\n"); goto err; } } crtc = drmm_kzalloc(ddev, sizeof(*crtc), GFP_KERNEL); if (!crtc) { - DRM_ERROR("Failed to allocate crtc\n"); + drm_err(ddev, "Failed to allocate crtc\n"); ret = -ENOMEM; goto err; } ret = ltdc_crtc_init(ddev, crtc); if (ret) { - DRM_ERROR("Failed to init crtc\n"); + drm_err(ddev, "Failed to init crtc\n"); goto err; } ret = drm_vblank_init(ddev, NB_CRTC); if (ret) { - DRM_ERROR("Failed calling drm_vblank_init()\n"); + drm_err(ddev, "Failed calling drm_vblank_init()\n"); goto err; } clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + pinctrl_pm_select_sleep_state(ddev->dev); pm_runtime_enable(ddev->dev); @@ -2042,12 +2092,15 @@ int ltdc_load(struct drm_device *ddev) err: clk_disable_unprepare(ldev->pixel_clk); + if (ldev->bus_clk) + clk_disable_unprepare(ldev->bus_clk); + return ret; } void ltdc_unload(struct drm_device *ddev) { - DRM_DEBUG_DRIVER("\n"); + drm_dbg_driver(ddev, "\n"); pm_runtime_disable(ddev->dev); } diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h index 9d488043ffdb..17b51a7ce28e 100644 --- a/drivers/gpu/drm/stm/ltdc.h +++ b/drivers/gpu/drm/stm/ltdc.h @@ -40,10 +40,16 @@ struct fps_info { ktime_t last_timestamp; }; +struct ltdc_plat_data { + int pad_max_freq_hz; /* max frequency supported by pad */ +}; + struct ltdc_device { void __iomem *regs; struct regmap *regmap; struct clk *pixel_clk; /* lcd pixel clock */ + struct clk *lvds_clk; /* lvds pixel clock */ + struct clk *bus_clk; /* bus clock */ struct mutex err_lock; /* protecting error_status */ struct ltdc_caps caps; u32 irq_status; diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_helper.h b/drivers/gpu/drm/sysfb/drm_sysfb_helper.h index 1424b63dde99..89633e30ca62 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_helper.h +++ b/drivers/gpu/drm/sysfb/drm_sysfb_helper.h @@ -132,7 +132,7 @@ int drm_sysfb_plane_helper_get_scanout_buffer(struct drm_plane *plane, struct drm_sysfb_crtc_state { struct drm_crtc_state base; - /* Primary-plane format; required for color mgmt. */ + /* CRTC input color format; required for color mgmt. */ const struct drm_format_info *format; }; diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c b/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c index 1bcdb5ee8f09..ddb4a7523ee6 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c +++ b/drivers/gpu/drm/sysfb/drm_sysfb_modeset.c @@ -210,7 +210,12 @@ int drm_sysfb_plane_helper_atomic_check(struct drm_plane *plane, else if (!new_plane_state->visible) return 0; - if (new_fb->format != sysfb->fb_format) { + new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); + + new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); + new_sysfb_crtc_state->format = sysfb->fb_format; + + if (new_fb->format != new_sysfb_crtc_state->format) { void *buf; /* format conversion necessary; reserve buffer */ @@ -220,11 +225,6 @@ int drm_sysfb_plane_helper_atomic_check(struct drm_plane *plane, return -ENOMEM; } - new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); - - new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); - new_sysfb_crtc_state->format = new_fb->format; - return 0; } EXPORT_SYMBOL(drm_sysfb_plane_helper_atomic_check); @@ -238,7 +238,9 @@ void drm_sysfb_plane_helper_atomic_update(struct drm_plane *plane, struct drm_at struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; unsigned int dst_pitch = sysfb->fb_pitch; - const struct drm_format_info *dst_format = sysfb->fb_format; + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); + struct drm_sysfb_crtc_state *sysfb_crtc_state = to_drm_sysfb_crtc_state(crtc_state); + const struct drm_format_info *dst_format = sysfb_crtc_state->format; struct drm_atomic_helper_damage_iter iter; struct drm_rect damage; int ret, idx; @@ -370,16 +372,19 @@ EXPORT_SYMBOL(drm_sysfb_crtc_helper_atomic_check); void drm_sysfb_crtc_reset(struct drm_crtc *crtc) { + struct drm_sysfb_device *sysfb = to_drm_sysfb_device(crtc->dev); struct drm_sysfb_crtc_state *sysfb_crtc_state; if (crtc->state) drm_sysfb_crtc_state_destroy(to_drm_sysfb_crtc_state(crtc->state)); sysfb_crtc_state = kzalloc(sizeof(*sysfb_crtc_state), GFP_KERNEL); - if (sysfb_crtc_state) + if (sysfb_crtc_state) { + sysfb_crtc_state->format = sysfb->fb_format; __drm_atomic_helper_crtc_reset(crtc, &sysfb_crtc_state->base); - else + } else { __drm_atomic_helper_crtc_reset(crtc, NULL); + } } EXPORT_SYMBOL(drm_sysfb_crtc_reset); diff --git a/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c b/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c index 0b3fb874a51f..885864168c54 100644 --- a/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c +++ b/drivers/gpu/drm/sysfb/drm_sysfb_screen_info.c @@ -79,22 +79,19 @@ const struct drm_format_info *drm_sysfb_get_format_si(struct drm_device *dev, const struct screen_info *si) { const struct drm_format_info *format = NULL; - u32 bits_per_pixel; + struct pixel_format pixel; size_t i; + int ret; - bits_per_pixel = __screen_info_lfb_bits_per_pixel(si); + ret = screen_info_pixel_format(si, &pixel); + if (ret) + return NULL; for (i = 0; i < nformats; ++i) { - const struct pixel_format *f = &formats[i].pixel; - - if (bits_per_pixel == f->bits_per_pixel && - si->red_size == f->red.length && - si->red_pos == f->red.offset && - si->green_size == f->green.length && - si->green_pos == f->green.offset && - si->blue_size == f->blue.length && - si->blue_pos == f->blue.offset) { - format = drm_format_info(formats[i].fourcc); + const struct drm_sysfb_format *f = &formats[i]; + + if (pixel_format_equal(&pixel, &f->pixel)) { + format = drm_format_info(f->fourcc); break; } } diff --git a/drivers/gpu/drm/sysfb/simpledrm.c b/drivers/gpu/drm/sysfb/simpledrm.c index 8530a3ef8a7a..0358164a623c 100644 --- a/drivers/gpu/drm/sysfb/simpledrm.c +++ b/drivers/gpu/drm/sysfb/simpledrm.c @@ -4,7 +4,7 @@ #include <linux/clk.h> #include <linux/of_clk.h> #include <linux/minmax.h> -#include <linux/of_address.h> +#include <linux/of_reserved_mem.h> #include <linux/platform_data/simplefb.h> #include <linux/platform_device.h> #include <linux/pm_domain.h> @@ -179,22 +179,17 @@ simplefb_get_format_of(struct drm_device *dev, struct device_node *of_node) static struct resource * simplefb_get_memory_of(struct drm_device *dev, struct device_node *of_node) { - struct device_node *np; - struct resource *res; + struct resource r, *res; int err; - np = of_parse_phandle(of_node, "memory-region", 0); - if (!np) + err = of_reserved_mem_region_to_resource(of_node, 0, &r); + if (err) return NULL; - res = devm_kzalloc(dev->dev, sizeof(*res), GFP_KERNEL); + res = devm_kmemdup(dev->dev, &r, sizeof(r), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); - err = of_address_to_resource(np, 0, res); - if (err) - return ERR_PTR(err); - if (of_property_present(of_node, "reg")) drm_warn(dev, "preferring \"memory-region\" over \"reg\" property\n"); diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 2b107958942c..16a4b52d45c6 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -46,6 +46,7 @@ static const struct drm_format_info *vesadrm_get_format_si(struct drm_device *de { PIXEL_FORMAT_RGB888, DRM_FORMAT_RGB888, }, { PIXEL_FORMAT_XRGB8888, DRM_FORMAT_XRGB8888, }, { PIXEL_FORMAT_XBGR8888, DRM_FORMAT_XBGR8888, }, + { PIXEL_FORMAT_C8, DRM_FORMAT_C8, }, }; return drm_sysfb_get_format_si(dev, formats, ARRAY_SIZE(formats), si); @@ -82,7 +83,7 @@ static struct vesadrm_device *to_vesadrm_device(struct drm_device *dev) } /* - * Palette + * Color LUT */ static void vesadrm_vga_cmap_write(struct vesadrm_device *vesa, unsigned int index, @@ -128,7 +129,7 @@ static void vesadrm_pmi_cmap_write(struct vesadrm_device *vesa, unsigned int ind } #endif -static void vesadrm_set_gamma_lut(struct drm_crtc *crtc, unsigned int index, +static void vesadrm_set_color_lut(struct drm_crtc *crtc, unsigned int index, u16 red, u16 green, u16 blue) { struct drm_device *dev = crtc->dev; @@ -149,15 +150,15 @@ static void vesadrm_fill_gamma_lut(struct vesadrm_device *vesa, switch (format->format) { case DRM_FORMAT_XRGB1555: - drm_crtc_fill_gamma_555(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_555(crtc, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB565: - drm_crtc_fill_gamma_565(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_565(crtc, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_BGRX8888: - drm_crtc_fill_gamma_888(crtc, vesadrm_set_gamma_lut); + drm_crtc_fill_gamma_888(crtc, vesadrm_set_color_lut); break; default: drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", @@ -175,15 +176,53 @@ static void vesadrm_load_gamma_lut(struct vesadrm_device *vesa, switch (format->format) { case DRM_FORMAT_XRGB1555: - drm_crtc_load_gamma_555_from_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_555_from_888(crtc, lut, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB565: - drm_crtc_load_gamma_565_from_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_565_from_888(crtc, lut, vesadrm_set_color_lut); break; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: case DRM_FORMAT_BGRX8888: - drm_crtc_load_gamma_888(crtc, lut, vesadrm_set_gamma_lut); + drm_crtc_load_gamma_888(crtc, lut, vesadrm_set_color_lut); + break; + default: + drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", + &format->format); + break; + } +} + +static void vesadrm_fill_palette_lut(struct vesadrm_device *vesa, + const struct drm_format_info *format) +{ + struct drm_device *dev = &vesa->sysfb.dev; + struct drm_crtc *crtc = &vesa->crtc; + + switch (format->format) { + case DRM_FORMAT_C8: + drm_crtc_fill_palette_8(crtc, vesadrm_set_color_lut); + break; + case DRM_FORMAT_RGB332: + drm_crtc_fill_palette_332(crtc, vesadrm_set_color_lut); + break; + default: + drm_warn_once(dev, "Unsupported format %p4cc for palette\n", + &format->format); + break; + } +} + +static void vesadrm_load_palette_lut(struct vesadrm_device *vesa, + const struct drm_format_info *format, + struct drm_color_lut *lut) +{ + struct drm_device *dev = &vesa->sysfb.dev; + struct drm_crtc *crtc = &vesa->crtc; + + switch (format->format) { + case DRM_FORMAT_C8: + drm_crtc_load_palette_8(crtc, lut, vesadrm_set_color_lut); break; default: drm_warn_once(dev, "Unsupported format %p4cc for gamma correction\n", @@ -200,8 +239,67 @@ static const u64 vesadrm_primary_plane_format_modifiers[] = { DRM_SYSFB_PLANE_FORMAT_MODIFIERS, }; +static int vesadrm_primary_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *new_state) +{ + struct drm_sysfb_device *sysfb = to_drm_sysfb_device(plane->dev); + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(new_state, plane); + struct drm_framebuffer *new_fb = new_plane_state->fb; + struct drm_crtc_state *new_crtc_state; + struct drm_sysfb_crtc_state *new_sysfb_crtc_state; + int ret; + + ret = drm_sysfb_plane_helper_atomic_check(plane, new_state); + if (ret) + return ret; + else if (!new_plane_state->visible) + return 0; + + /* + * Fix up format conversion for specific cases + */ + + switch (sysfb->fb_format->format) { + case DRM_FORMAT_C8: + new_crtc_state = drm_atomic_get_new_crtc_state(new_state, new_plane_state->crtc); + new_sysfb_crtc_state = to_drm_sysfb_crtc_state(new_crtc_state); + + switch (new_fb->format->format) { + case DRM_FORMAT_XRGB8888: + /* + * Reduce XRGB8888 to RGB332. Each resulting pixel is an index + * into the C8 hardware palette, which stores RGB332 colors. + */ + if (new_sysfb_crtc_state->format->format != DRM_FORMAT_RGB332) { + new_sysfb_crtc_state->format = + drm_format_info(DRM_FORMAT_RGB332); + new_crtc_state->color_mgmt_changed = true; + } + break; + case DRM_FORMAT_C8: + /* + * Restore original output. Emulation of XRGB8888 set RBG332 + * output format and hardware palette. This needs to be undone + * when we switch back to DRM_FORMAT_C8. + */ + if (new_sysfb_crtc_state->format->format == DRM_FORMAT_RGB332) { + new_sysfb_crtc_state->format = sysfb->fb_format; + new_crtc_state->color_mgmt_changed = true; + } + break; + } + break; + } + + return 0; +} + static const struct drm_plane_helper_funcs vesadrm_primary_plane_helper_funcs = { - DRM_SYSFB_PLANE_HELPER_FUNCS, + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = vesadrm_primary_plane_helper_atomic_check, + .atomic_update = drm_sysfb_plane_helper_atomic_update, + .atomic_disable = drm_sysfb_plane_helper_atomic_disable, + .get_scanout_buffer = drm_sysfb_plane_helper_get_scanout_buffer, }; static const struct drm_plane_funcs vesadrm_primary_plane_funcs = { @@ -223,15 +321,36 @@ static void vesadrm_crtc_helper_atomic_flush(struct drm_crtc *crtc, * plane's color format. */ if (crtc_state->enable && crtc_state->color_mgmt_changed) { - if (sysfb_crtc_state->format == sysfb->fb_format) { - if (crtc_state->gamma_lut) - vesadrm_load_gamma_lut(vesa, - sysfb_crtc_state->format, - crtc_state->gamma_lut->data); - else + switch (sysfb->fb_format->format) { + /* + * Index formats + */ + case DRM_FORMAT_C8: + if (sysfb_crtc_state->format->format == DRM_FORMAT_RGB332) { + vesadrm_fill_palette_lut(vesa, sysfb_crtc_state->format); + } else if (crtc->state->gamma_lut) { + vesadrm_load_palette_lut(vesa, + sysfb_crtc_state->format, + crtc_state->gamma_lut->data); + } else { + vesadrm_fill_palette_lut(vesa, sysfb_crtc_state->format); + } + break; + /* + * Component formats + */ + default: + if (sysfb_crtc_state->format == sysfb->fb_format) { + if (crtc_state->gamma_lut) + vesadrm_load_gamma_lut(vesa, + sysfb_crtc_state->format, + crtc_state->gamma_lut->data); + else + vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); + } else { vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); - } else { - vesadrm_fill_gamma_lut(vesa, sysfb_crtc_state->format); + } + break; } } } @@ -334,14 +453,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv, if (!__screen_info_vbe_mode_nonvga(si)) { vesa->cmap_write = vesadrm_vga_cmap_write; -#if defined(CONFIG_X86_32) } else { +#if defined(CONFIG_X86_32) phys_addr_t pmi_base = __screen_info_vesapm_info_base(si); - const u16 *pmi_addr = phys_to_virt(pmi_base); - vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; - vesa->cmap_write = vesadrm_pmi_cmap_write; + if (pmi_base) { + const u16 *pmi_addr = phys_to_virt(pmi_base); + + vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; + vesa->cmap_write = vesadrm_pmi_cmap_write; + } else #endif + if (format->is_color_indexed) + drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n"); } #if defined(CONFIG_FIRMWARE_EDID) diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 0b65e69f3a8a..1dd3670f37db 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -185,11 +185,13 @@ bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer); int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, struct tegra_bo_tiling *tiling); struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct tegra_bo **planes, unsigned int num_planes); struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd); #ifdef CONFIG_DRM_FBDEV_EMULATION diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 46170753699d..dd041089f797 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -102,6 +102,7 @@ static const struct drm_framebuffer_funcs tegra_fb_funcs = { }; struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct tegra_bo **planes, unsigned int num_planes) @@ -114,7 +115,7 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, if (!fb) return ERR_PTR(-ENOMEM); - drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(drm, fb, info, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) fb->obj[i] = &planes[i]->gem; @@ -132,9 +133,9 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(drm, cmd); struct tegra_bo *planes[4]; struct drm_gem_object *gem; struct drm_framebuffer *fb; @@ -166,7 +167,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, planes[i] = to_tegra_bo(gem); } - fb = tegra_fb_alloc(drm, cmd, planes, i); + fb = tegra_fb_alloc(drm, info, cmd, planes, i); if (IS_ERR(fb)) { err = PTR_ERR(fb); goto unreference; diff --git a/drivers/gpu/drm/tegra/fbdev.c b/drivers/gpu/drm/tegra/fbdev.c index cd9d798f8870..1b70f5e164af 100644 --- a/drivers/gpu/drm/tegra/fbdev.c +++ b/drivers/gpu/drm/tegra/fbdev.c @@ -106,7 +106,9 @@ int tegra_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, return PTR_ERR(info); } - fb = tegra_fb_alloc(drm, &cmd, &bo, 1); + fb = tegra_fb_alloc(drm, + drm_get_format_info(drm, cmd.pixel_format, cmd.modifier[0]), + &cmd, &bo, 1); if (IS_ERR(fb)) { err = PTR_ERR(fb); dev_err(drm->dev, "failed to allocate DRM framebuffer: %d\n", diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index dbc1394f96b8..8ede07fb7a21 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -523,7 +523,7 @@ void tegra_bo_free_object(struct drm_gem_object *gem) if (tegra->domain) { tegra_bo_iommu_unmap(tegra, bo); - if (gem->import_attach) { + if (drm_gem_is_imported(gem)) { dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt, DMA_TO_DEVICE); dma_buf_detach(gem->import_attach->dmabuf, gem->import_attach); diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 2d9a0a3f6c38..7a38664e890e 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec) if (!client->group) { virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(nvdec->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); if (IS_ERR(virt)) diff --git a/drivers/gpu/drm/tests/drm_exec_test.c b/drivers/gpu/drm/tests/drm_exec_test.c index d6c4dd1194a0..3a20c788c51f 100644 --- a/drivers/gpu/drm/tests/drm_exec_test.c +++ b/drivers/gpu/drm/tests/drm_exec_test.c @@ -150,14 +150,22 @@ static void test_prepare(struct kunit *test) static void test_prepare_array(struct kunit *test) { struct drm_exec_priv *priv = test->priv; - struct drm_gem_object gobj1 = { }; - struct drm_gem_object gobj2 = { }; - struct drm_gem_object *array[] = { &gobj1, &gobj2 }; + struct drm_gem_object *gobj1; + struct drm_gem_object *gobj2; + struct drm_gem_object *array[] = { + (gobj1 = kunit_kzalloc(test, sizeof(*gobj1), GFP_KERNEL)), + (gobj2 = kunit_kzalloc(test, sizeof(*gobj2), GFP_KERNEL)), + }; struct drm_exec exec; int ret; - drm_gem_private_object_init(priv->drm, &gobj1, PAGE_SIZE); - drm_gem_private_object_init(priv->drm, &gobj2, PAGE_SIZE); + if (!gobj1 || !gobj2) { + KUNIT_FAIL(test, "Failed to allocate GEM objects.\n"); + return; + } + + drm_gem_private_object_init(priv->drm, gobj1, PAGE_SIZE); + drm_gem_private_object_init(priv->drm, gobj2, PAGE_SIZE); drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) @@ -166,8 +174,8 @@ static void test_prepare_array(struct kunit *test) KUNIT_EXPECT_EQ(test, ret, 0); drm_exec_fini(&exec); - drm_gem_private_object_fini(&gobj1); - drm_gem_private_object_fini(&gobj2); + drm_gem_private_object_fini(gobj1); + drm_gem_private_object_fini(gobj2); } static void test_multiple_loops(struct kunit *test) diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c index ad06762db671..981dada8f3a8 100644 --- a/drivers/gpu/drm/tests/drm_format_helper_test.c +++ b/drivers/gpu/drm/tests/drm_format_helper_test.c @@ -735,13 +735,13 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) NULL : &result->dst_pitch; drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, - &fmtcnv_state, false); + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ - drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, - &fmtcnv_state, true); + drm_fb_xrgb8888_to_rgb565be(&dst, &result->dst_pitch, &src, &fb, ¶ms->clip, + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size); @@ -749,7 +749,7 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test) memset(buf, 0, dst_size); drm_fb_xrgb8888_to_rgb565(&dst, dst_pitch, &src, &fb, ¶ms->clip, - &fmtcnv_state, false); + &fmtcnv_state); buf = le16buf_to_cpu(test, (__force const __le16 *)buf, dst_size / sizeof(__le16)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } @@ -1033,13 +1033,14 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test) NULL : &result->dst_pitch; drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); - buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32)); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); buf = dst.vaddr; /* restore original value of buf */ memset(buf, 0, dst_size); drm_fb_xrgb8888_to_xrgb2101010(&dst, dst_pitch, &src, &fb, ¶ms->clip, &fmtcnv_state); + buf = le32buf_to_cpu(test, (__force const __le32 *)buf, dst_size / sizeof(u32)); KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size); } diff --git a/drivers/gpu/drm/tests/drm_framebuffer_test.c b/drivers/gpu/drm/tests/drm_framebuffer_test.c index 6ea04cc8f324..9b8e01e8cd91 100644 --- a/drivers/gpu/drm/tests/drm_framebuffer_test.c +++ b/drivers/gpu/drm/tests/drm_framebuffer_test.c @@ -363,6 +363,7 @@ struct drm_framebuffer_test_priv { static struct drm_framebuffer *fb_create_mock(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_framebuffer_test_priv *priv = container_of(dev, typeof(*priv), dev); diff --git a/drivers/gpu/drm/tests/drm_kunit_edid.h b/drivers/gpu/drm/tests/drm_kunit_edid.h index 02e2761b3b1f..c59c8528a3f7 100644 --- a/drivers/gpu/drm/tests/drm_kunit_edid.h +++ b/drivers/gpu/drm/tests/drm_kunit_edid.h @@ -46,6 +46,13 @@ * Monitor ranges (GTF): 50-70 Hz V, 30-70 kHz H, max dotclock 150 MHz * Dummy Descriptor: * Checksum: 0xab + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_dvi_1080p[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -62,6 +69,10 @@ static const unsigned char test_edid_dvi_1080p[] = { }; /* + * + * This edid is intentionally broken with the 100MHz limit. It's meant + * to be used only with tests in unusual situations. + * * edid-decode (hex): * * 00 ff ff ff ff ff ff 00 31 d8 2a 00 00 00 00 00 @@ -73,14 +84,14 @@ static const unsigned char test_edid_dvi_1080p[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 14 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 14 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 e4 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 * * ---------------- * @@ -135,8 +146,19 @@ static const unsigned char test_edid_dvi_1080p[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 100 MHz - * Extended HDMI video details: - * Checksum: 0xe4 Unused space in Extension Block: 100 bytes + * Checksum: 0x10 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * Failures: + * + * EDID: + * CTA-861: The maximum HDMI TMDS clock is 100000 kHz, but one or more video timings go up to 148500 kHz. + * + * EDID conformity: FAIL */ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -147,11 +169,11 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -160,7 +182,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xe4 + 0x00, 0x00, 0x00, 0x10 }; /* @@ -175,14 +197,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 28 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d0 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc * * ---------------- * @@ -237,8 +259,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 200 MHz - * Extended HDMI video details: - * Checksum: 0xd0 Unused space in Extension Block: 100 bytes + * Checksum: 0xfc Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -249,11 +277,11 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x28, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -262,7 +290,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xd0 + 0x00, 0x00, 0x00, 0xfc }; /* @@ -277,14 +305,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 44 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d0 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 e0 * * ---------------- * @@ -339,8 +367,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 340 MHz - * Extended HDMI video details: - * Checksum: 0xd0 Unused space in Extension Block: 100 bytes + * Checksum: 0xe0 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -351,11 +385,11 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x44, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -364,7 +398,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xd0 + 0x00, 0x00, 0x00, 0xe0 }; /* @@ -379,14 +413,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7a * - * 02 03 1b b1 e3 05 00 20 41 10 e2 00 ca 6d 03 0c - * 00 12 34 78 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 b1 e3 05 00 20 41 10 e2 00 ca 67 03 0c + * 00 12 34 78 28 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a8 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d4 * * ---------------- * @@ -447,8 +481,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { * DC_30bit * DC_Y444 * Maximum TMDS clock: 200 MHz - * Extended HDMI video details: - * Checksum: 0xa8 Unused space in Extension Block: 100 bytes + * Checksum: 0xd4 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -461,9 +501,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7a, 0x02, 0x03, 0x1b, 0xb1, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x78, 0x28, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7a, 0x02, 0x03, 0x15, 0xb1, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x78, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -472,7 +512,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xa8 + 0x00, 0x00, 0x00, 0xd4 }; /* @@ -487,14 +527,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 8a * - * 02 03 1b b1 e3 05 00 20 41 10 e2 00 ca 6d 03 0c - * 00 12 34 78 44 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 b1 e3 05 00 20 41 10 e2 00 ca 67 03 0c + * 00 12 34 78 44 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 8c + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 b8 * * ---------------- * @@ -555,8 +595,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { * DC_30bit * DC_Y444 * Maximum TMDS clock: 340 MHz - * Extended HDMI video details: - * Checksum: 0x8c Unused space in Extension Block: 100 bytes + * Checksum: 0xb8 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -569,9 +615,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x8a, 0x02, 0x03, 0x1b, 0xb1, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x78, 0x44, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x8a, 0x02, 0x03, 0x15, 0xb1, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x78, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -580,7 +626,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x8c + 0x00, 0x00, 0x00, 0xb8 }; /* diff --git a/drivers/gpu/drm/tidss/Makefile b/drivers/gpu/drm/tidss/Makefile index 312645271014..b6d6becf1683 100644 --- a/drivers/gpu/drm/tidss/Makefile +++ b/drivers/gpu/drm/tidss/Makefile @@ -7,6 +7,7 @@ tidss-y := tidss_crtc.o \ tidss_irq.o \ tidss_plane.o \ tidss_scale_coefs.o \ - tidss_dispc.o + tidss_dispc.o \ + tidss_oldi.o obj-$(CONFIG_DRM_TIDSS) += tidss.o diff --git a/drivers/gpu/drm/tidss/tidss_crtc.c b/drivers/gpu/drm/tidss/tidss_crtc.c index a2f40a5c7703..da89fd01c337 100644 --- a/drivers/gpu/drm/tidss/tidss_crtc.c +++ b/drivers/gpu/drm/tidss/tidss_crtc.c @@ -91,7 +91,7 @@ static int tidss_crtc_atomic_check(struct drm_crtc *crtc, struct dispc_device *dispc = tidss->dispc; struct tidss_crtc *tcrtc = to_tidss_crtc(crtc); u32 hw_videoport = tcrtc->hw_videoport; - const struct drm_display_mode *mode; + struct drm_display_mode *mode; enum drm_mode_status ok; dev_dbg(ddev->dev, "%s\n", __func__); @@ -108,6 +108,9 @@ static int tidss_crtc_atomic_check(struct drm_crtc *crtc, return -EINVAL; } + if (drm_atomic_crtc_needs_modeset(crtc_state)) + drm_mode_set_crtcinfo(mode, 0); + return dispc_vp_bus_check(dispc, hw_videoport, crtc_state); } @@ -225,7 +228,7 @@ static void tidss_crtc_atomic_enable(struct drm_crtc *crtc, tidss_runtime_get(tidss); r = dispc_vp_set_clk_rate(tidss->dispc, tcrtc->hw_videoport, - mode->clock * 1000); + mode->crtc_clock * 1000); if (r != 0) return; diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index 21363ccbd763..7c8c15a5c39b 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -4,6 +4,7 @@ * Author: Jyri Sarha <jsarha@ti.com> */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/dma-mapping.h> @@ -146,7 +147,7 @@ static const u16 tidss_am65x_common_regs[DISPC_COMMON_REG_TABLE_LEN] = { const struct dispc_features dispc_am65x_feats = { .max_pclk_khz = { [DISPC_VP_DPI] = 165000, - [DISPC_VP_OLDI] = 165000, + [DISPC_VP_OLDI_AM65X] = 165000, }, .scaling = { @@ -176,7 +177,7 @@ const struct dispc_features dispc_am65x_feats = { .vp_name = { "vp1", "vp2" }, .ovr_name = { "ovr1", "ovr2" }, .vpclk_name = { "vp1", "vp2" }, - .vp_bus_type = { DISPC_VP_OLDI, DISPC_VP_DPI }, + .vp_bus_type = { DISPC_VP_OLDI_AM65X, DISPC_VP_DPI }, .vp_feat = { .color = { .has_ctm = true, @@ -491,7 +492,7 @@ struct dispc_device { void __iomem *base_ovr[TIDSS_MAX_PORTS]; void __iomem *base_vp[TIDSS_MAX_PORTS]; - struct regmap *oldi_io_ctrl; + struct regmap *am65x_oldi_io_ctrl; struct clk *vp_clk[TIDSS_MAX_PORTS]; @@ -566,84 +567,81 @@ static u32 dispc_vp_read(struct dispc_device *dispc, u32 hw_videoport, u16 reg) return ioread32(base + reg); } -/* - * TRM gives bitfields as start:end, where start is the higher bit - * number. For example 7:0 - */ - -static u32 FLD_MASK(u32 start, u32 end) -{ - return ((1 << (start - end + 1)) - 1) << end; -} - -static u32 FLD_VAL(u32 val, u32 start, u32 end) -{ - return (val << end) & FLD_MASK(start, end); -} - -static u32 FLD_GET(u32 val, u32 start, u32 end) -{ - return (val & FLD_MASK(start, end)) >> end; -} - -static u32 FLD_MOD(u32 orig, u32 val, u32 start, u32 end) -{ - return (orig & ~FLD_MASK(start, end)) | FLD_VAL(val, start, end); -} - -static u32 REG_GET(struct dispc_device *dispc, u32 idx, u32 start, u32 end) +int tidss_configure_oldi(struct tidss_device *tidss, u32 hw_videoport, + u32 oldi_cfg) { - return FLD_GET(dispc_read(dispc, idx), start, end); -} + u32 count = 0; + u32 oldi_reset_bit = BIT(5 + hw_videoport); -static void REG_FLD_MOD(struct dispc_device *dispc, u32 idx, u32 val, - u32 start, u32 end) -{ - dispc_write(dispc, idx, FLD_MOD(dispc_read(dispc, idx), val, - start, end)); -} + dispc_vp_write(tidss->dispc, hw_videoport, DISPC_VP_DSS_OLDI_CFG, oldi_cfg); -static u32 VID_REG_GET(struct dispc_device *dispc, u32 hw_plane, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_vid_read(dispc, hw_plane, idx), start, end); -} + while (!(oldi_reset_bit & dispc_read(tidss->dispc, DSS_SYSSTATUS)) && + count < 10000) + count++; -static void VID_REG_FLD_MOD(struct dispc_device *dispc, u32 hw_plane, u32 idx, - u32 val, u32 start, u32 end) -{ - dispc_vid_write(dispc, hw_plane, idx, - FLD_MOD(dispc_vid_read(dispc, hw_plane, idx), - val, start, end)); -} + if (!(oldi_reset_bit & dispc_read(tidss->dispc, DSS_SYSSTATUS))) + return -ETIMEDOUT; -static u32 VP_REG_GET(struct dispc_device *dispc, u32 vp, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_vp_read(dispc, vp, idx), start, end); + return 0; } -static void VP_REG_FLD_MOD(struct dispc_device *dispc, u32 vp, u32 idx, u32 val, - u32 start, u32 end) +void tidss_disable_oldi(struct tidss_device *tidss, u32 hw_videoport) { - dispc_vp_write(dispc, vp, idx, FLD_MOD(dispc_vp_read(dispc, vp, idx), - val, start, end)); + dispc_vp_write(tidss->dispc, hw_videoport, DISPC_VP_DSS_OLDI_CFG, 0); } -__maybe_unused -static u32 OVR_REG_GET(struct dispc_device *dispc, u32 ovr, u32 idx, - u32 start, u32 end) -{ - return FLD_GET(dispc_ovr_read(dispc, ovr, idx), start, end); -} +/* + * TRM gives bitfields as start:end, where start is the higher bit + * number. For example 7:0 + */ -static void OVR_REG_FLD_MOD(struct dispc_device *dispc, u32 ovr, u32 idx, - u32 val, u32 start, u32 end) -{ - dispc_ovr_write(dispc, ovr, idx, - FLD_MOD(dispc_ovr_read(dispc, ovr, idx), - val, start, end)); -} +#define REG_GET(dispc, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_read((dispc), (idx)))) + +#define REG_FLD_MOD(dispc, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _idx = (idx); \ + u32 _reg = dispc_read(_dispc, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_write(_dispc, _idx, _reg); \ + }) + +#define VID_REG_GET(dispc, hw_plane, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_vid_read((dispc), (hw_plane), (idx)))) + +#define VID_REG_FLD_MOD(dispc, hw_plane, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _hw_plane = (hw_plane); \ + u32 _idx = (idx); \ + u32 _reg = dispc_vid_read(_dispc, _hw_plane, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_vid_write(_dispc, _hw_plane, _idx, _reg); \ + }) + +#define VP_REG_GET(dispc, vp, idx, mask) \ + ((u32)FIELD_GET((mask), dispc_vp_read((dispc), (vp), (idx)))) + +#define VP_REG_FLD_MOD(dispc, vp, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _vp = (vp); \ + u32 _idx = (idx); \ + u32 _reg = dispc_vp_read(_dispc, _vp, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_vp_write(_dispc, _vp, _idx, _reg); \ + }) + +#define OVR_REG_FLD_MOD(dispc, ovr, idx, val, mask) \ + ({ \ + struct dispc_device *_dispc = (dispc); \ + u32 _ovr = (ovr); \ + u32 _idx = (idx); \ + u32 _reg = dispc_ovr_read(_dispc, _ovr, _idx); \ + FIELD_MODIFY((mask), &_reg, (val)); \ + dispc_ovr_write(_dispc, _ovr, _idx, _reg); \ + }) static dispc_irq_t dispc_vp_irq_from_raw(u32 stat, u32 hw_videoport) { @@ -1016,13 +1014,11 @@ void dispc_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) } } -enum dispc_oldi_mode_reg_val { SPWG_18 = 0, JEIDA_24 = 1, SPWG_24 = 2 }; - struct dispc_bus_format { u32 bus_fmt; u32 data_width; bool is_oldi_fmt; - enum dispc_oldi_mode_reg_val oldi_mode_reg_val; + enum oldi_mode_reg_val am65x_oldi_mode_reg_val; }; static const struct dispc_bus_format dispc_bus_formats[] = { @@ -1066,7 +1062,7 @@ int dispc_vp_bus_check(struct dispc_device *dispc, u32 hw_videoport, return -EINVAL; } - if (dispc->feat->vp_bus_type[hw_videoport] != DISPC_VP_OLDI && + if (dispc->feat->vp_bus_type[hw_videoport] != DISPC_VP_OLDI_AM65X && fmt->is_oldi_fmt) { dev_dbg(dispc->dev, "%s: %s is not OLDI-port\n", __func__, dispc->feat->vp_name[hw_videoport]); @@ -1076,23 +1072,23 @@ int dispc_vp_bus_check(struct dispc_device *dispc, u32 hw_videoport, return 0; } -static void dispc_oldi_tx_power(struct dispc_device *dispc, bool power) +static void dispc_am65x_oldi_tx_power(struct dispc_device *dispc, bool power) { - u32 val = power ? 0 : OLDI_PWRDN_TX; + u32 val = power ? 0 : AM65X_OLDI_PWRDN_TX; - if (WARN_ON(!dispc->oldi_io_ctrl)) + if (WARN_ON(!dispc->am65x_oldi_io_ctrl)) return; - regmap_update_bits(dispc->oldi_io_ctrl, OLDI_DAT0_IO_CTRL, - OLDI_PWRDN_TX, val); - regmap_update_bits(dispc->oldi_io_ctrl, OLDI_DAT1_IO_CTRL, - OLDI_PWRDN_TX, val); - regmap_update_bits(dispc->oldi_io_ctrl, OLDI_DAT2_IO_CTRL, - OLDI_PWRDN_TX, val); - regmap_update_bits(dispc->oldi_io_ctrl, OLDI_DAT3_IO_CTRL, - OLDI_PWRDN_TX, val); - regmap_update_bits(dispc->oldi_io_ctrl, OLDI_CLK_IO_CTRL, - OLDI_PWRDN_TX, val); + regmap_update_bits(dispc->am65x_oldi_io_ctrl, AM65X_OLDI_DAT0_IO_CTRL, + AM65X_OLDI_PWRDN_TX, val); + regmap_update_bits(dispc->am65x_oldi_io_ctrl, AM65X_OLDI_DAT1_IO_CTRL, + AM65X_OLDI_PWRDN_TX, val); + regmap_update_bits(dispc->am65x_oldi_io_ctrl, AM65X_OLDI_DAT2_IO_CTRL, + AM65X_OLDI_PWRDN_TX, val); + regmap_update_bits(dispc->am65x_oldi_io_ctrl, AM65X_OLDI_DAT3_IO_CTRL, + AM65X_OLDI_PWRDN_TX, val); + regmap_update_bits(dispc->am65x_oldi_io_ctrl, AM65X_OLDI_CLK_IO_CTRL, + AM65X_OLDI_PWRDN_TX, val); } static void dispc_set_num_datalines(struct dispc_device *dispc, @@ -1118,11 +1114,12 @@ static void dispc_set_num_datalines(struct dispc_device *dispc, v = 3; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, v, 10, 8); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, v, + DISPC_VP_CONTROL_DATALINES_MASK); } -static void dispc_enable_oldi(struct dispc_device *dispc, u32 hw_videoport, - const struct dispc_bus_format *fmt) +static void dispc_enable_am65x_oldi(struct dispc_device *dispc, u32 hw_videoport, + const struct dispc_bus_format *fmt) { u32 oldi_cfg = 0; u32 oldi_reset_bit = BIT(5 + hw_videoport); @@ -1141,7 +1138,8 @@ static void dispc_enable_oldi(struct dispc_device *dispc, u32 hw_videoport, oldi_cfg |= BIT(7); /* DEPOL */ - oldi_cfg = FLD_MOD(oldi_cfg, fmt->oldi_mode_reg_val, 3, 1); + FIELD_MODIFY(DISPC_VP_DSS_OLDI_CFG_MAP_MASK, &oldi_cfg, + fmt->am65x_oldi_mode_reg_val); oldi_cfg |= BIT(12); /* SOFTRST */ @@ -1170,10 +1168,10 @@ void dispc_vp_prepare(struct dispc_device *dispc, u32 hw_videoport, if (WARN_ON(!fmt)) return; - if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI) { - dispc_oldi_tx_power(dispc, true); + if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI_AM65X) { + dispc_am65x_oldi_tx_power(dispc, true); - dispc_enable_oldi(dispc, hw_videoport, fmt); + dispc_enable_am65x_oldi(dispc, hw_videoport, fmt); } } @@ -1194,23 +1192,23 @@ void dispc_vp_enable(struct dispc_device *dispc, u32 hw_videoport, dispc_set_num_datalines(dispc, hw_videoport, fmt->data_width); - hfp = mode->hsync_start - mode->hdisplay; - hsw = mode->hsync_end - mode->hsync_start; - hbp = mode->htotal - mode->hsync_end; + hfp = mode->crtc_hsync_start - mode->crtc_hdisplay; + hsw = mode->crtc_hsync_end - mode->crtc_hsync_start; + hbp = mode->crtc_htotal - mode->crtc_hsync_end; - vfp = mode->vsync_start - mode->vdisplay; - vsw = mode->vsync_end - mode->vsync_start; - vbp = mode->vtotal - mode->vsync_end; + vfp = mode->crtc_vsync_start - mode->crtc_vdisplay; + vsw = mode->crtc_vsync_end - mode->crtc_vsync_start; + vbp = mode->crtc_vtotal - mode->crtc_vsync_end; dispc_vp_write(dispc, hw_videoport, DISPC_VP_TIMING_H, - FLD_VAL(hsw - 1, 7, 0) | - FLD_VAL(hfp - 1, 19, 8) | - FLD_VAL(hbp - 1, 31, 20)); + FIELD_PREP(DISPC_VP_TIMING_H_SYNC_PULSE_MASK, hsw - 1) | + FIELD_PREP(DISPC_VP_TIMING_H_FRONT_PORCH_MASK, hfp - 1) | + FIELD_PREP(DISPC_VP_TIMING_H_BACK_PORCH_MASK, hbp - 1)); dispc_vp_write(dispc, hw_videoport, DISPC_VP_TIMING_V, - FLD_VAL(vsw - 1, 7, 0) | - FLD_VAL(vfp, 19, 8) | - FLD_VAL(vbp, 31, 20)); + FIELD_PREP(DISPC_VP_TIMING_V_SYNC_PULSE_MASK, vsw - 1) | + FIELD_PREP(DISPC_VP_TIMING_V_FRONT_PORCH_MASK, vfp) | + FIELD_PREP(DISPC_VP_TIMING_V_BACK_PORCH_MASK, vbp)); ivs = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); @@ -1229,48 +1227,55 @@ void dispc_vp_enable(struct dispc_device *dispc, u32 hw_videoport, align = true; /* always use DE_HIGH for OLDI */ - if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI) + if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI_AM65X) ieo = false; dispc_vp_write(dispc, hw_videoport, DISPC_VP_POL_FREQ, - FLD_VAL(align, 18, 18) | - FLD_VAL(onoff, 17, 17) | - FLD_VAL(rf, 16, 16) | - FLD_VAL(ieo, 15, 15) | - FLD_VAL(ipc, 14, 14) | - FLD_VAL(ihs, 13, 13) | - FLD_VAL(ivs, 12, 12)); + FIELD_PREP(DISPC_VP_POL_FREQ_ALIGN_MASK, align) | + FIELD_PREP(DISPC_VP_POL_FREQ_ONOFF_MASK, onoff) | + FIELD_PREP(DISPC_VP_POL_FREQ_RF_MASK, rf) | + FIELD_PREP(DISPC_VP_POL_FREQ_IEO_MASK, ieo) | + FIELD_PREP(DISPC_VP_POL_FREQ_IPC_MASK, ipc) | + FIELD_PREP(DISPC_VP_POL_FREQ_IHS_MASK, ihs) | + FIELD_PREP(DISPC_VP_POL_FREQ_IVS_MASK, ivs)); dispc_vp_write(dispc, hw_videoport, DISPC_VP_SIZE_SCREEN, - FLD_VAL(mode->hdisplay - 1, 11, 0) | - FLD_VAL(mode->vdisplay - 1, 27, 16)); + FIELD_PREP(DISPC_VP_SIZE_SCREEN_HDISPLAY_MASK, + mode->crtc_hdisplay - 1) | + FIELD_PREP(DISPC_VP_SIZE_SCREEN_VDISPLAY_MASK, + mode->crtc_vdisplay - 1)); - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, 0, 0); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, + DISPC_VP_CONTROL_ENABLE_MASK); } void dispc_vp_disable(struct dispc_device *dispc, u32 hw_videoport) { - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 0, 0, 0); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 0, + DISPC_VP_CONTROL_ENABLE_MASK); } void dispc_vp_unprepare(struct dispc_device *dispc, u32 hw_videoport) { - if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI) { + if (dispc->feat->vp_bus_type[hw_videoport] == DISPC_VP_OLDI_AM65X) { dispc_vp_write(dispc, hw_videoport, DISPC_VP_DSS_OLDI_CFG, 0); - dispc_oldi_tx_power(dispc, false); + dispc_am65x_oldi_tx_power(dispc, false); } } bool dispc_vp_go_busy(struct dispc_device *dispc, u32 hw_videoport) { - return VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, 5, 5); + return VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, + DISPC_VP_CONTROL_GOBIT_MASK); } void dispc_vp_go(struct dispc_device *dispc, u32 hw_videoport) { - WARN_ON(VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, 5, 5)); - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, 5, 5); + WARN_ON(VP_REG_GET(dispc, hw_videoport, DISPC_VP_CONTROL, + DISPC_VP_CONTROL_GOBIT_MASK)); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONTROL, 1, + DISPC_VP_CONTROL_GOBIT_MASK); } enum c8_to_c12_mode { C8_TO_C12_REPLICATE, C8_TO_C12_MAX, C8_TO_C12_MIN }; @@ -1420,7 +1425,6 @@ void dispc_vp_disable_clk(struct dispc_device *dispc, u32 hw_videoport) * Calculate the percentage difference between the requested pixel clock rate * and the effective rate resulting from calculating the clock divider value. */ -static unsigned int dispc_pclk_diff(unsigned long rate, unsigned long real_rate) { int r = rate / 100, rr = real_rate / 100; @@ -1471,11 +1475,11 @@ static void dispc_am65x_ovr_set_plane(struct dispc_device *dispc, u32 hw_id = dispc->feat->vid_info[hw_plane].hw_id; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - hw_id, 4, 1); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - x, 17, 6); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - y, 30, 19); + hw_id, DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), x, + DISPC_OVR_ATTRIBUTES_POSX_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), y, + DISPC_OVR_ATTRIBUTES_POSY_MASK); } static void dispc_j721e_ovr_set_plane(struct dispc_device *dispc, @@ -1485,11 +1489,11 @@ static void dispc_j721e_ovr_set_plane(struct dispc_device *dispc, u32 hw_id = dispc->feat->vid_info[hw_plane].hw_id; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - hw_id, 4, 1); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), - x, 13, 0); - OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), - y, 29, 16); + hw_id, DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), x, + DISPC_OVR_ATTRIBUTES2_POSX_MASK); + OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES2(layer), y, + DISPC_OVR_ATTRIBUTES2_POSY_MASK); } void dispc_ovr_set_plane(struct dispc_device *dispc, u32 hw_plane, @@ -1524,7 +1528,7 @@ void dispc_ovr_enable_layer(struct dispc_device *dispc, return; OVR_REG_FLD_MOD(dispc, hw_videoport, DISPC_OVR_ATTRIBUTES(layer), - !!enable, 0, 0); + !!enable, DISPC_OVR_ATTRIBUTES_ENABLE_MASK); } /* CSC */ @@ -1560,14 +1564,14 @@ struct dispc_csc_coef { static void dispc_csc_offset_regval(const struct dispc_csc_coef *csc, u32 *regval) { -#define OVAL(x, y) (FLD_VAL(x, 15, 3) | FLD_VAL(y, 31, 19)) +#define OVAL(x, y) (FIELD_PREP(GENMASK(15, 3), x) | FIELD_PREP(GENMASK(31, 19), y)) regval[5] = OVAL(csc->preoffset[0], csc->preoffset[1]); regval[6] = OVAL(csc->preoffset[2], csc->postoffset[0]); regval[7] = OVAL(csc->postoffset[1], csc->postoffset[2]); #undef OVAL } -#define CVAL(x, y) (FLD_VAL(x, 10, 0) | FLD_VAL(y, 26, 16)) +#define CVAL(x, y) (FIELD_PREP(GENMASK(10, 0), x) | FIELD_PREP(GENMASK(26, 16), y)) static void dispc_csc_yuv2rgb_regval(const struct dispc_csc_coef *csc, u32 *regval) { @@ -1747,7 +1751,8 @@ static void dispc_vid_csc_setup(struct dispc_device *dispc, u32 hw_plane, static void dispc_vid_csc_enable(struct dispc_device *dispc, u32 hw_plane, bool enable) { - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, 9, 9); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, + DISPC_VID_ATTRIBUTES_COLORCONVENABLE_MASK); } /* SCALER */ @@ -1806,7 +1811,8 @@ static void dispc_vid_write_fir_coefs(struct dispc_device *dispc, c1 = coefs->c1[phase]; c2 = coefs->c2[phase]; - c12 = FLD_VAL(c1, 19, 10) | FLD_VAL(c2, 29, 20); + c12 = FIELD_PREP(GENMASK(19, 10), c1) | FIELD_PREP(GENMASK(29, 20), + c2); dispc_vid_write(dispc, hw_plane, reg, c12); } @@ -2003,20 +2009,20 @@ static void dispc_vid_set_scaling(struct dispc_device *dispc, u32 fourcc) { /* HORIZONTAL RESIZE ENABLE */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->scale_x, 7, 7); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->scale_x, + DISPC_VID_ATTRIBUTES_HRESIZEENABLE_MASK); /* VERTICAL RESIZE ENABLE */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->scale_y, 8, 8); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->scale_y, + DISPC_VID_ATTRIBUTES_VRESIZEENABLE_MASK); /* Skip the rest if no scaling is used */ if (!sp->scale_x && !sp->scale_y) return; /* VERTICAL 5-TAPS */ - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, - sp->five_taps, 21, 21); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, sp->five_taps, + DISPC_VID_ATTRIBUTES_VERTICALTAPS_MASK); if (dispc_fourcc_is_yuv(fourcc)) { if (sp->scale_x) { @@ -2106,7 +2112,7 @@ static void dispc_plane_set_pixel_format(struct dispc_device *dispc, if (dispc_color_formats[i].fourcc == fourcc) { VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, dispc_color_formats[i].dss_code, - 6, 1); + DISPC_VID_ATTRIBUTES_FORMAT_MASK); return; } } @@ -2228,7 +2234,8 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, dispc_vid_write(dispc, hw_plane, DISPC_VID_BA_EXT_1, (u64)dma_addr >> 32); dispc_vid_write(dispc, hw_plane, DISPC_VID_PICTURE_SIZE, - (scale.in_w - 1) | ((scale.in_h - 1) << 16)); + FIELD_PREP(DISPC_VID_PICTURE_SIZE_MEMSIZEY_MASK, scale.in_h - 1) | + FIELD_PREP(DISPC_VID_PICTURE_SIZE_MEMSIZEX_MASK, scale.in_w - 1)); /* For YUV422 format we use the macropixel size for pixel inc */ if (fourcc == DRM_FORMAT_YUYV || fourcc == DRM_FORMAT_UYVY) @@ -2265,8 +2272,10 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, if (!lite) { dispc_vid_write(dispc, hw_plane, DISPC_VID_SIZE, - (state->crtc_w - 1) | - ((state->crtc_h - 1) << 16)); + FIELD_PREP(DISPC_VID_SIZE_SIZEY_MASK, + state->crtc_h - 1) | + FIELD_PREP(DISPC_VID_SIZE_SIZEX_MASK, + state->crtc_w - 1)); dispc_vid_set_scaling(dispc, hw_plane, &scale, fourcc); } @@ -2280,38 +2289,45 @@ void dispc_plane_setup(struct dispc_device *dispc, u32 hw_plane, } dispc_vid_write(dispc, hw_plane, DISPC_VID_GLOBAL_ALPHA, - 0xFF & (state->alpha >> 8)); + FIELD_PREP(DISPC_VID_GLOBAL_ALPHA_GLOBALALPHA_MASK, + state->alpha >> 8)); if (state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 1, - 28, 28); + DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK); else VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 0, - 28, 28); + DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK); } void dispc_plane_enable(struct dispc_device *dispc, u32 hw_plane, bool enable) { - VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, 0, 0); + VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, !!enable, + DISPC_VID_ATTRIBUTES_ENABLE_MASK); } static u32 dispc_vid_get_fifo_size(struct dispc_device *dispc, u32 hw_plane) { - return VID_REG_GET(dispc, hw_plane, DISPC_VID_BUF_SIZE_STATUS, 15, 0); + return VID_REG_GET(dispc, hw_plane, DISPC_VID_BUF_SIZE_STATUS, + DISPC_VID_BUF_SIZE_STATUS_BUFSIZE_MASK); } static void dispc_vid_set_mflag_threshold(struct dispc_device *dispc, u32 hw_plane, u32 low, u32 high) { dispc_vid_write(dispc, hw_plane, DISPC_VID_MFLAG_THRESHOLD, - FLD_VAL(high, 31, 16) | FLD_VAL(low, 15, 0)); + FIELD_PREP(DISPC_VID_MFLAG_THRESHOLD_HT_MFLAG_MASK, high) | + FIELD_PREP(DISPC_VID_MFLAG_THRESHOLD_LT_MFLAG_MASK, low)); } static void dispc_vid_set_buf_threshold(struct dispc_device *dispc, u32 hw_plane, u32 low, u32 high) { dispc_vid_write(dispc, hw_plane, DISPC_VID_BUF_THRESHOLD, - FLD_VAL(high, 31, 16) | FLD_VAL(low, 15, 0)); + FIELD_PREP(DISPC_VID_BUF_THRESHOLD_BUFHIGHTHRESHOLD_MASK, + high) | + FIELD_PREP(DISPC_VID_BUF_THRESHOLD_BUFLOWTHRESHOLD_MASK, + low)); } static void dispc_k2g_plane_init(struct dispc_device *dispc) @@ -2321,9 +2337,11 @@ static void dispc_k2g_plane_init(struct dispc_device *dispc) dev_dbg(dispc->dev, "%s()\n", __func__); /* MFLAG_CTRL = ENABLED */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, 1, 0); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK); /* MFLAG_START = MFLAGNORMALSTARTMODE */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, 6, 6); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK); for (hw_plane = 0; hw_plane < dispc->feat->num_vids; hw_plane++) { u32 size = dispc_vid_get_fifo_size(dispc, hw_plane); @@ -2360,7 +2378,7 @@ static void dispc_k2g_plane_init(struct dispc_device *dispc) * register is ignored. */ VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 1, - 19, 19); + DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK); } } @@ -2372,13 +2390,15 @@ static void dispc_k3_plane_init(struct dispc_device *dispc) dev_dbg(dispc->dev, "%s()\n", __func__); - REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_lo_pri, 2, 0); - REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_hi_pri, 5, 3); + REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_lo_pri, DSS_CBA_CFG_PRI_LO_MASK); + REG_FLD_MOD(dispc, DSS_CBA_CFG, cba_hi_pri, DSS_CBA_CFG_PRI_HI_MASK); /* MFLAG_CTRL = ENABLED */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, 1, 0); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 2, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK); /* MFLAG_START = MFLAGNORMALSTARTMODE */ - REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, 6, 6); + REG_FLD_MOD(dispc, DISPC_GLOBAL_MFLAG_ATTRIBUTE, 0, + DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK); for (hw_plane = 0; hw_plane < dispc->feat->num_vids; hw_plane++) { u32 size = dispc_vid_get_fifo_size(dispc, hw_plane); @@ -2411,7 +2431,7 @@ static void dispc_k3_plane_init(struct dispc_device *dispc) /* Prefech up to PRELOAD value */ VID_REG_FLD_MOD(dispc, hw_plane, DISPC_VID_ATTRIBUTES, 0, - 19, 19); + DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK); } } @@ -2441,7 +2461,8 @@ static void dispc_vp_init(struct dispc_device *dispc) /* Enable the gamma Shadow bit-field for all VPs*/ for (i = 0; i < dispc->feat->num_vps; i++) - VP_REG_FLD_MOD(dispc, i, DISPC_VP_CONFIG, 1, 2, 2); + VP_REG_FLD_MOD(dispc, i, DISPC_VP_CONFIG, 1, + DISPC_VP_CONFIG_GAMMAENABLE_MASK); } static void dispc_initial_config(struct dispc_device *dispc) @@ -2452,8 +2473,8 @@ static void dispc_initial_config(struct dispc_device *dispc) /* Note: Hardcoded DPI routing on J721E for now */ if (dispc->feat->subrev == DISPC_J721E) { dispc_write(dispc, DISPC_CONNECTIONS, - FLD_VAL(2, 3, 0) | /* VP1 to DPI0 */ - FLD_VAL(8, 7, 4) /* VP3 to DPI1 */ + FIELD_PREP(DISPC_CONNECTIONS_DPI_0_CONN_MASK, 2) | /* VP1 to DPI0 */ + FIELD_PREP(DISPC_CONNECTIONS_DPI_1_CONN_MASK, 8) /* VP3 to DPI1 */ ); } } @@ -2631,8 +2652,8 @@ static void dispc_k2g_cpr_from_ctm(const struct drm_color_ctm *ctm, cpr->m[CSC_BB] = dispc_S31_32_to_s2_8(ctm->matrix[8]); } -#define CVAL(xR, xG, xB) (FLD_VAL(xR, 9, 0) | FLD_VAL(xG, 20, 11) | \ - FLD_VAL(xB, 31, 22)) +#define CVAL(xR, xG, xB) (FIELD_PREP(GENMASK(9, 0), xR) | FIELD_PREP(GENMASK(20, 11), xG) | \ + FIELD_PREP(GENMASK(31, 22), xB)) static void dispc_k2g_vp_csc_cpr_regval(const struct dispc_csc_coef *csc, u32 *regval) @@ -2674,8 +2695,8 @@ static void dispc_k2g_vp_set_ctm(struct dispc_device *dispc, u32 hw_videoport, cprenable = 1; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, - cprenable, 15, 15); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, cprenable, + DISPC_VP_CONFIG_CPR_MASK); } static s16 dispc_S31_32_to_s3_8(s64 coef) @@ -2740,8 +2761,8 @@ static void dispc_k3_vp_set_ctm(struct dispc_device *dispc, u32 hw_videoport, colorconvenable = 1; } - VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, - colorconvenable, 24, 24); + VP_REG_FLD_MOD(dispc, hw_videoport, DISPC_VP_CONFIG, colorconvenable, + DISPC_VP_CONFIG_COLORCONVENABLE_MASK); } static void dispc_vp_set_color_mgmt(struct dispc_device *dispc, @@ -2796,26 +2817,26 @@ int dispc_runtime_resume(struct dispc_device *dispc) clk_prepare_enable(dispc->fclk); - if (REG_GET(dispc, DSS_SYSSTATUS, 0, 0) == 0) + if (REG_GET(dispc, DSS_SYSSTATUS, DSS_SYSSTATUS_DISPC_FUNC_RESETDONE) == 0) dev_warn(dispc->dev, "DSS FUNC RESET not done!\n"); dev_dbg(dispc->dev, "OMAP DSS7 rev 0x%x\n", dispc_read(dispc, DSS_REVISION)); dev_dbg(dispc->dev, "VP RESETDONE %d,%d,%d\n", - REG_GET(dispc, DSS_SYSSTATUS, 1, 1), - REG_GET(dispc, DSS_SYSSTATUS, 2, 2), - REG_GET(dispc, DSS_SYSSTATUS, 3, 3)); + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(1, 1)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(2, 2)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(3, 3))); if (dispc->feat->subrev == DISPC_AM625 || dispc->feat->subrev == DISPC_AM65X) dev_dbg(dispc->dev, "OLDI RESETDONE %d,%d,%d\n", - REG_GET(dispc, DSS_SYSSTATUS, 5, 5), - REG_GET(dispc, DSS_SYSSTATUS, 6, 6), - REG_GET(dispc, DSS_SYSSTATUS, 7, 7)); + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(5, 5)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(6, 6)), + REG_GET(dispc, DSS_SYSSTATUS, GENMASK(7, 7))); dev_dbg(dispc->dev, "DISPC IDLE %d\n", - REG_GET(dispc, DSS_SYSSTATUS, 9, 9)); + REG_GET(dispc, DSS_SYSSTATUS, DSS_SYSSTATUS_DISPC_IDLE_STATUS)); dispc_initial_config(dispc); @@ -2852,15 +2873,15 @@ static int dispc_iomap_resource(struct platform_device *pdev, const char *name, static int dispc_init_am65x_oldi_io_ctrl(struct device *dev, struct dispc_device *dispc) { - dispc->oldi_io_ctrl = + dispc->am65x_oldi_io_ctrl = syscon_regmap_lookup_by_phandle(dev->of_node, "ti,am65x-oldi-io-ctrl"); - if (PTR_ERR(dispc->oldi_io_ctrl) == -ENODEV) { - dispc->oldi_io_ctrl = NULL; - } else if (IS_ERR(dispc->oldi_io_ctrl)) { + if (PTR_ERR(dispc->am65x_oldi_io_ctrl) == -ENODEV) { + dispc->am65x_oldi_io_ctrl = NULL; + } else if (IS_ERR(dispc->am65x_oldi_io_ctrl)) { dev_err(dev, "%s: syscon_regmap_lookup_by_phandle failed %ld\n", - __func__, PTR_ERR(dispc->oldi_io_ctrl)); - return PTR_ERR(dispc->oldi_io_ctrl); + __func__, PTR_ERR(dispc->am65x_oldi_io_ctrl)); + return PTR_ERR(dispc->am65x_oldi_io_ctrl); } return 0; } @@ -2892,7 +2913,8 @@ static void dispc_softreset_k2g(struct dispc_device *dispc) spin_unlock_irqrestore(&dispc->tidss->irq_lock, flags); for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx) - VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0); + VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, + DISPC_VP_CONTROL_ENABLE_MASK); } static int dispc_softreset(struct dispc_device *dispc) @@ -2906,7 +2928,7 @@ static int dispc_softreset(struct dispc_device *dispc) } /* Soft reset */ - REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, 1, 1); + REG_FLD_MOD(dispc, DSS_SYSCONFIG, 1, DSS_SYSCONFIG_SOFTRESET_MASK); /* Wait for reset to complete */ ret = readl_poll_timeout(dispc->base_common + DSS_SYSSTATUS, val, val & 1, 100, 5000); diff --git a/drivers/gpu/drm/tidss/tidss_dispc.h b/drivers/gpu/drm/tidss/tidss_dispc.h index 28958514b8f5..60c1b400eb89 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.h +++ b/drivers/gpu/drm/tidss/tidss_dispc.h @@ -7,11 +7,14 @@ #ifndef __TIDSS_DISPC_H__ #define __TIDSS_DISPC_H__ +#include <drm/drm_color_mgmt.h> + #include "tidss_drv.h" struct dispc_device; struct drm_crtc_state; +struct drm_plane_state; enum tidss_gamma_type { TIDSS_GAMMA_8BIT, TIDSS_GAMMA_10BIT }; @@ -58,7 +61,7 @@ struct dispc_errata { enum dispc_vp_bus_type { DISPC_VP_DPI, /* DPI output */ - DISPC_VP_OLDI, /* OLDI (LVDS) output */ + DISPC_VP_OLDI_AM65X, /* OLDI (LVDS) output for AM65x DSS */ DISPC_VP_INTERNAL, /* SoC internal routing */ DISPC_VP_TIED_OFF, /* Tied off / Unavailable */ DISPC_VP_MAX_BUS_TYPE, @@ -101,6 +104,11 @@ extern const struct dispc_features dispc_am62l_feats; extern const struct dispc_features dispc_am65x_feats; extern const struct dispc_features dispc_j721e_feats; +int tidss_configure_oldi(struct tidss_device *tidss, u32 hw_videoport, + u32 oldi_cfg); +void tidss_disable_oldi(struct tidss_device *tidss, u32 hw_videoport); +unsigned int dispc_pclk_diff(unsigned long rate, unsigned long real_rate); + void dispc_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask); dispc_irq_t dispc_read_and_clear_irqstatus(struct dispc_device *dispc); diff --git a/drivers/gpu/drm/tidss/tidss_dispc_regs.h b/drivers/gpu/drm/tidss/tidss_dispc_regs.h index e88148e44937..382027dddce8 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc_regs.h +++ b/drivers/gpu/drm/tidss/tidss_dispc_regs.h @@ -56,7 +56,12 @@ enum dispc_common_regs { #define DSS_REVISION REG(DSS_REVISION) #define DSS_SYSCONFIG REG(DSS_SYSCONFIG) +#define DSS_SYSCONFIG_SOFTRESET_MASK GENMASK(1, 1) + #define DSS_SYSSTATUS REG(DSS_SYSSTATUS) +#define DSS_SYSSTATUS_DISPC_IDLE_STATUS GENMASK(9, 9) +#define DSS_SYSSTATUS_DISPC_FUNC_RESETDONE GENMASK(0, 0) + #define DISPC_IRQ_EOI REG(DISPC_IRQ_EOI) #define DISPC_IRQSTATUS_RAW REG(DISPC_IRQSTATUS_RAW) #define DISPC_IRQSTATUS REG(DISPC_IRQSTATUS) @@ -70,9 +75,15 @@ enum dispc_common_regs { #define WB_IRQSTATUS REG(WB_IRQSTATUS) #define DISPC_GLOBAL_MFLAG_ATTRIBUTE REG(DISPC_GLOBAL_MFLAG_ATTRIBUTE) +#define DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_START_MASK GENMASK(6, 6) +#define DISPC_GLOBAL_MFLAG_ATTRIBUTE_MFLAG_CTRL_MASK GENMASK(1, 0) + #define DISPC_GLOBAL_OUTPUT_ENABLE REG(DISPC_GLOBAL_OUTPUT_ENABLE) #define DISPC_GLOBAL_BUFFER REG(DISPC_GLOBAL_BUFFER) #define DSS_CBA_CFG REG(DSS_CBA_CFG) +#define DSS_CBA_CFG_PRI_HI_MASK GENMASK(5, 3) +#define DSS_CBA_CFG_PRI_LO_MASK GENMASK(2, 0) + #define DISPC_DBG_CONTROL REG(DISPC_DBG_CONTROL) #define DISPC_DBG_STATUS REG(DISPC_DBG_STATUS) #define DISPC_CLKGATING_DISABLE REG(DISPC_CLKGATING_DISABLE) @@ -88,6 +99,9 @@ enum dispc_common_regs { #define FBDC_CONSTANT_COLOR_0 REG(FBDC_CONSTANT_COLOR_0) #define FBDC_CONSTANT_COLOR_1 REG(FBDC_CONSTANT_COLOR_1) #define DISPC_CONNECTIONS REG(DISPC_CONNECTIONS) +#define DISPC_CONNECTIONS_DPI_1_CONN_MASK GENMASK(7, 4) +#define DISPC_CONNECTIONS_DPI_0_CONN_MASK GENMASK(3, 0) + #define DISPC_MSS_VP1 REG(DISPC_MSS_VP1) #define DISPC_MSS_VP3 REG(DISPC_MSS_VP3) @@ -102,13 +116,27 @@ enum dispc_common_regs { #define DISPC_VID_ACCUV2_0 0x18 #define DISPC_VID_ACCUV2_1 0x1c #define DISPC_VID_ATTRIBUTES 0x20 +#define DISPC_VID_ATTRIBUTES_PREMULTIPLYALPHA_MASK GENMASK(28, 28) +#define DISPC_VID_ATTRIBUTES_VERTICALTAPS_MASK GENMASK(21, 21) +#define DISPC_VID_ATTRIBUTES_BUFPRELOAD_MASK GENMASK(19, 19) +#define DISPC_VID_ATTRIBUTES_COLORCONVENABLE_MASK GENMASK(9, 9) +#define DISPC_VID_ATTRIBUTES_VRESIZEENABLE_MASK GENMASK(8, 8) +#define DISPC_VID_ATTRIBUTES_HRESIZEENABLE_MASK GENMASK(7, 7) +#define DISPC_VID_ATTRIBUTES_FORMAT_MASK GENMASK(6, 1) +#define DISPC_VID_ATTRIBUTES_ENABLE_MASK GENMASK(0, 0) + #define DISPC_VID_ATTRIBUTES2 0x24 #define DISPC_VID_BA_0 0x28 #define DISPC_VID_BA_1 0x2c #define DISPC_VID_BA_UV_0 0x30 #define DISPC_VID_BA_UV_1 0x34 #define DISPC_VID_BUF_SIZE_STATUS 0x38 +#define DISPC_VID_BUF_SIZE_STATUS_BUFSIZE_MASK GENMASK(15, 0) + #define DISPC_VID_BUF_THRESHOLD 0x3c +#define DISPC_VID_BUF_THRESHOLD_BUFHIGHTHRESHOLD_MASK GENMASK(31, 16) +#define DISPC_VID_BUF_THRESHOLD_BUFLOWTHRESHOLD_MASK GENMASK(15, 0) + #define DISPC_VID_CSC_COEF(n) (0x40 + (n) * 4) #define DISPC_VID_FIRH 0x5c @@ -137,15 +165,26 @@ enum dispc_common_regs { #define DISPC_VID_FIR_COEF_V12_C(phase) (0x1bc + (phase) * 4) #define DISPC_VID_GLOBAL_ALPHA 0x1fc +#define DISPC_VID_GLOBAL_ALPHA_GLOBALALPHA_MASK GENMASK(7, 0) + #define DISPC_VID_K2G_IRQENABLE 0x200 /* K2G */ #define DISPC_VID_K2G_IRQSTATUS 0x204 /* K2G */ #define DISPC_VID_MFLAG_THRESHOLD 0x208 +#define DISPC_VID_MFLAG_THRESHOLD_HT_MFLAG_MASK GENMASK(31, 16) +#define DISPC_VID_MFLAG_THRESHOLD_LT_MFLAG_MASK GENMASK(15, 0) + #define DISPC_VID_PICTURE_SIZE 0x20c +#define DISPC_VID_PICTURE_SIZE_MEMSIZEY_MASK GENMASK(27, 16) +#define DISPC_VID_PICTURE_SIZE_MEMSIZEX_MASK GENMASK(11, 0) + #define DISPC_VID_PIXEL_INC 0x210 #define DISPC_VID_K2G_POSITION 0x214 /* K2G */ #define DISPC_VID_PRELOAD 0x218 #define DISPC_VID_ROW_INC 0x21c #define DISPC_VID_SIZE 0x220 +#define DISPC_VID_SIZE_SIZEY_MASK GENMASK(27, 16) +#define DISPC_VID_SIZE_SIZEX_MASK GENMASK(11, 0) + #define DISPC_VID_BA_EXT_0 0x22c #define DISPC_VID_BA_EXT_1 0x230 #define DISPC_VID_BA_UV_EXT_0 0x234 @@ -173,11 +212,27 @@ enum dispc_common_regs { #define DISPC_OVR_TRANS_COLOR_MIN 0x18 #define DISPC_OVR_TRANS_COLOR_MIN2 0x1c #define DISPC_OVR_ATTRIBUTES(n) (0x20 + (n) * 4) +#define DISPC_OVR_ATTRIBUTES_POSY_MASK GENMASK(30, 19) +#define DISPC_OVR_ATTRIBUTES_POSX_MASK GENMASK(17, 6) +#define DISPC_OVR_ATTRIBUTES_CHANNELIN_MASK GENMASK(4, 1) +#define DISPC_OVR_ATTRIBUTES_ENABLE_MASK GENMASK(0, 0) + #define DISPC_OVR_ATTRIBUTES2(n) (0x34 + (n) * 4) /* J721E */ +#define DISPC_OVR_ATTRIBUTES2_POSY_MASK GENMASK(29, 16) +#define DISPC_OVR_ATTRIBUTES2_POSX_MASK GENMASK(13, 0) + /* VP */ #define DISPC_VP_CONFIG 0x0 +#define DISPC_VP_CONFIG_COLORCONVENABLE_MASK GENMASK(24, 24) +#define DISPC_VP_CONFIG_CPR_MASK GENMASK(15, 15) +#define DISPC_VP_CONFIG_GAMMAENABLE_MASK GENMASK(2, 2) + #define DISPC_VP_CONTROL 0x4 +#define DISPC_VP_CONTROL_DATALINES_MASK GENMASK(10, 8) +#define DISPC_VP_CONTROL_GOBIT_MASK GENMASK(5, 5) +#define DISPC_VP_CONTROL_ENABLE_MASK GENMASK(0, 0) + #define DISPC_VP_CSC_COEF0 0x8 #define DISPC_VP_CSC_COEF1 0xc #define DISPC_VP_CSC_COEF2 0x10 @@ -189,9 +244,28 @@ enum dispc_common_regs { #define DISPC_VP_DATA_CYCLE_2 0x1c #define DISPC_VP_LINE_NUMBER 0x44 #define DISPC_VP_POL_FREQ 0x4c +#define DISPC_VP_POL_FREQ_ALIGN_MASK GENMASK(18, 18) +#define DISPC_VP_POL_FREQ_ONOFF_MASK GENMASK(17, 17) +#define DISPC_VP_POL_FREQ_RF_MASK GENMASK(16, 16) +#define DISPC_VP_POL_FREQ_IEO_MASK GENMASK(15, 15) +#define DISPC_VP_POL_FREQ_IPC_MASK GENMASK(14, 14) +#define DISPC_VP_POL_FREQ_IHS_MASK GENMASK(13, 13) +#define DISPC_VP_POL_FREQ_IVS_MASK GENMASK(12, 12) + #define DISPC_VP_SIZE_SCREEN 0x50 +#define DISPC_VP_SIZE_SCREEN_HDISPLAY_MASK GENMASK(11, 0) +#define DISPC_VP_SIZE_SCREEN_VDISPLAY_MASK GENMASK(27, 16) + #define DISPC_VP_TIMING_H 0x54 +#define DISPC_VP_TIMING_H_SYNC_PULSE_MASK GENMASK(7, 0) +#define DISPC_VP_TIMING_H_FRONT_PORCH_MASK GENMASK(19, 8) +#define DISPC_VP_TIMING_H_BACK_PORCH_MASK GENMASK(31, 20) + #define DISPC_VP_TIMING_V 0x58 +#define DISPC_VP_TIMING_V_SYNC_PULSE_MASK GENMASK(7, 0) +#define DISPC_VP_TIMING_V_FRONT_PORCH_MASK GENMASK(19, 8) +#define DISPC_VP_TIMING_V_BACK_PORCH_MASK GENMASK(31, 20) + #define DISPC_VP_CSC_COEF3 0x5c #define DISPC_VP_CSC_COEF4 0x60 #define DISPC_VP_CSC_COEF5 0x64 @@ -220,24 +294,43 @@ enum dispc_common_regs { #define DISPC_VP_SAFETY_LFSR_SEED 0x110 #define DISPC_VP_GAMMA_TABLE 0x120 #define DISPC_VP_DSS_OLDI_CFG 0x160 +#define DISPC_VP_DSS_OLDI_CFG_MAP_MASK GENMASK(3, 1) + #define DISPC_VP_DSS_OLDI_STATUS 0x164 #define DISPC_VP_DSS_OLDI_LB 0x168 #define DISPC_VP_DSS_MERGE_SPLIT 0x16c /* J721E */ #define DISPC_VP_DSS_DMA_THREADSIZE 0x170 /* J721E */ #define DISPC_VP_DSS_DMA_THREADSIZE_STATUS 0x174 /* J721E */ +/* OLDI Config Bits (DISPC_VP_DSS_OLDI_CFG) */ +#define OLDI_ENABLE BIT(0) +#define OLDI_MAP (BIT(1) | BIT(2) | BIT(3)) +#define OLDI_SRC BIT(4) +#define OLDI_CLONE_MODE BIT(5) +#define OLDI_MASTERSLAVE BIT(6) +#define OLDI_DEPOL BIT(7) +#define OLDI_MSB BIT(8) +#define OLDI_LBEN BIT(9) +#define OLDI_LBDATA BIT(10) +#define OLDI_DUALMODESYNC BIT(11) +#define OLDI_SOFTRST BIT(12) +#define OLDI_TPATCFG BIT(13) + +/* LVDS Format values for OLDI_MAP field in DISPC_VP_OLDI_CFG register */ +enum oldi_mode_reg_val { SPWG_18 = 0, JEIDA_24 = 1, SPWG_24 = 2 }; + /* * OLDI IO_CTRL register offsets. On AM654 the registers are found * from CTRL_MMR0, there the syscon regmap should map 0x14 bytes from * CTRLMMR0P1_OLDI_DAT0_IO_CTRL to CTRLMMR0P1_OLDI_CLK_IO_CTRL * register range. */ -#define OLDI_DAT0_IO_CTRL 0x00 -#define OLDI_DAT1_IO_CTRL 0x04 -#define OLDI_DAT2_IO_CTRL 0x08 -#define OLDI_DAT3_IO_CTRL 0x0C -#define OLDI_CLK_IO_CTRL 0x10 +#define AM65X_OLDI_DAT0_IO_CTRL 0x00 +#define AM65X_OLDI_DAT1_IO_CTRL 0x04 +#define AM65X_OLDI_DAT2_IO_CTRL 0x08 +#define AM65X_OLDI_DAT3_IO_CTRL 0x0C +#define AM65X_OLDI_CLK_IO_CTRL 0x10 -#define OLDI_PWRDN_TX BIT(8) +#define AM65X_OLDI_PWRDN_TX BIT(8) #endif /* __TIDSS_DISPC_REGS_H */ diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c index f2a4f659f574..27d9a8fd541f 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.c +++ b/drivers/gpu/drm/tidss/tidss_drv.c @@ -8,6 +8,7 @@ #include <linux/of.h> #include <linux/module.h> #include <linux/pm_runtime.h> +#include <linux/aperture.h> #include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> @@ -24,6 +25,7 @@ #include "tidss_drv.h" #include "tidss_kms.h" #include "tidss_irq.h" +#include "tidss_oldi.h" /* Power management */ @@ -147,6 +149,10 @@ static int tidss_probe(struct platform_device *pdev) return ret; } + ret = tidss_oldi_init(tidss); + if (ret) + return dev_err_probe(dev, ret, "failed to init OLDI\n"); + pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, 1000); @@ -187,12 +193,20 @@ static int tidss_probe(struct platform_device *pdev) goto err_irq_uninstall; } + /* Remove possible early fb before setting up the fbdev */ + ret = aperture_remove_all_conflicting_devices(tidss_driver.name); + if (ret) + goto err_drm_dev_unreg; + drm_client_setup(ddev, NULL); dev_dbg(dev, "%s done\n", __func__); return 0; +err_drm_dev_unreg: + drm_dev_unregister(ddev); + err_irq_uninstall: tidss_irq_uninstall(ddev); @@ -203,6 +217,8 @@ err_runtime_suspend: pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); + tidss_oldi_deinit(tidss); + return ret; } @@ -227,6 +243,8 @@ static void tidss_remove(struct platform_device *pdev) pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); + tidss_oldi_deinit(tidss); + /* devm allocated dispc goes away with the dev so mark it NULL */ dispc_remove(tidss); diff --git a/drivers/gpu/drm/tidss/tidss_drv.h b/drivers/gpu/drm/tidss/tidss_drv.h index 7f4f4282bc04..84454a4855d1 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.h +++ b/drivers/gpu/drm/tidss/tidss_drv.h @@ -9,10 +9,14 @@ #include <linux/spinlock.h> +#include <drm/drm_device.h> + #define TIDSS_MAX_PORTS 4 #define TIDSS_MAX_PLANES 4 +#define TIDSS_MAX_OLDI_TXES 2 typedef u32 dispc_irq_t; +struct tidss_oldi; struct tidss_device { struct drm_device ddev; /* DRM device for DSS */ @@ -27,6 +31,9 @@ struct tidss_device { unsigned int num_planes; struct drm_plane *planes[TIDSS_MAX_PLANES]; + unsigned int num_oldis; + struct tidss_oldi *oldis[TIDSS_MAX_OLDI_TXES]; + unsigned int irq; /* protects the irq masks field and irqenable/irqstatus registers */ diff --git a/drivers/gpu/drm/tidss/tidss_encoder.c b/drivers/gpu/drm/tidss/tidss_encoder.c index 95b4aeff2775..81a04f767770 100644 --- a/drivers/gpu/drm/tidss/tidss_encoder.c +++ b/drivers/gpu/drm/tidss/tidss_encoder.c @@ -90,14 +90,18 @@ int tidss_encoder_create(struct tidss_device *tidss, struct drm_connector *connector; int ret; - t_enc = drmm_simple_encoder_alloc(&tidss->ddev, struct tidss_encoder, - encoder, encoder_type); + t_enc = devm_drm_bridge_alloc(tidss->dev, struct tidss_encoder, + bridge, &tidss_bridge_funcs); if (IS_ERR(t_enc)) return PTR_ERR(t_enc); + ret = drm_simple_encoder_init(&tidss->ddev, &t_enc->encoder, + encoder_type); + if (ret) + return ret; + t_enc->tidss = tidss; t_enc->next_bridge = next_bridge; - t_enc->bridge.funcs = &tidss_bridge_funcs; enc = &t_enc->encoder; enc->possible_crtcs = possible_crtcs; diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index 19432c08ec6b..c34eb90cddbe 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -144,7 +144,7 @@ static int tidss_dispc_modeset_init(struct tidss_device *tidss) dev_dbg(dev, "Setting up panel for port %d\n", i); switch (feat->vp_bus_type[i]) { - case DISPC_VP_OLDI: + case DISPC_VP_OLDI_AM65X: enc_type = DRM_MODE_ENCODER_LVDS; conn_type = DRM_MODE_CONNECTOR_LVDS; break; diff --git a/drivers/gpu/drm/tidss/tidss_oldi.c b/drivers/gpu/drm/tidss/tidss_oldi.c new file mode 100644 index 000000000000..7688251beba2 --- /dev/null +++ b/drivers/gpu/drm/tidss/tidss_oldi.c @@ -0,0 +1,597 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 - Texas Instruments Incorporated + * + * Aradhya Bhatia <a-bhatia1@ti.com> + */ + +#include <linux/clk.h> +#include <linux/of.h> +#include <linux/of_graph.h> +#include <linux/mfd/syscon.h> +#include <linux/media-bus-format.h> +#include <linux/regmap.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_of.h> + +#include "tidss_dispc.h" +#include "tidss_dispc_regs.h" +#include "tidss_oldi.h" + +struct tidss_oldi { + struct tidss_device *tidss; + struct device *dev; + + struct drm_bridge bridge; + struct drm_bridge *next_bridge; + + enum tidss_oldi_link_type link_type; + const struct oldi_bus_format *bus_format; + u32 oldi_instance; + int companion_instance; /* -1 when OLDI TX operates in Single-Link */ + u32 parent_vp; + + struct clk *serial; + struct regmap *io_ctrl; +}; + +struct oldi_bus_format { + u32 bus_fmt; + u32 data_width; + enum oldi_mode_reg_val oldi_mode_reg_val; + u32 input_bus_fmt; +}; + +static const struct oldi_bus_format oldi_bus_formats[] = { + { MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, 18, SPWG_18, MEDIA_BUS_FMT_RGB666_1X18 }, + { MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, 24, SPWG_24, MEDIA_BUS_FMT_RGB888_1X24 }, + { MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA, 24, JEIDA_24, MEDIA_BUS_FMT_RGB888_1X24 }, +}; + +#define OLDI_IDLE_CLK_HZ 25000000 /*25 MHz */ + +static inline struct tidss_oldi * +drm_bridge_to_tidss_oldi(struct drm_bridge *bridge) +{ + return container_of(bridge, struct tidss_oldi, bridge); +} + +static int tidss_oldi_bridge_attach(struct drm_bridge *bridge, + struct drm_encoder *encoder, + enum drm_bridge_attach_flags flags) +{ + struct tidss_oldi *oldi = drm_bridge_to_tidss_oldi(bridge); + + if (!oldi->next_bridge) { + dev_err(oldi->dev, + "%s: OLDI%u Failure attach next bridge\n", + __func__, oldi->oldi_instance); + return -ENODEV; + } + + if (!(flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)) { + dev_err(oldi->dev, + "%s: OLDI%u DRM_BRIDGE_ATTACH_NO_CONNECTOR is mandatory.\n", + __func__, oldi->oldi_instance); + return -EINVAL; + } + + return drm_bridge_attach(encoder, oldi->next_bridge, bridge, flags); +} + +static int +tidss_oldi_set_serial_clk(struct tidss_oldi *oldi, unsigned long rate) +{ + unsigned long new_rate; + int ret; + + ret = clk_set_rate(oldi->serial, rate); + if (ret) { + dev_err(oldi->dev, + "OLDI%u: failed to set serial clk rate to %lu Hz\n", + oldi->oldi_instance, rate); + return ret; + } + + new_rate = clk_get_rate(oldi->serial); + + if (dispc_pclk_diff(rate, new_rate) > 5) + dev_warn(oldi->dev, + "OLDI%u Clock rate %lu differs over 5%% from requested %lu\n", + oldi->oldi_instance, new_rate, rate); + + dev_dbg(oldi->dev, "OLDI%u: new rate %lu Hz (requested %lu Hz)\n", + oldi->oldi_instance, clk_get_rate(oldi->serial), rate); + + return 0; +} + +static void tidss_oldi_tx_power(struct tidss_oldi *oldi, bool enable) +{ + u32 mask; + + /* + * The power control bits are Active Low, and remain powered off by + * default. That is, the bits are set to 1. To power on the OLDI TXes, + * the bits must be cleared to 0. Since there are cases where not all + * OLDI TXes are being used, the power logic selectively powers them + * on. + * Setting the variable 'val' to particular bit masks, makes sure that + * the undesired OLDI TXes remain powered off. + */ + + if (enable) { + switch (oldi->link_type) { + case OLDI_MODE_SINGLE_LINK: + /* Power-on only the required OLDI TX's IO*/ + mask = OLDI_PWRDOWN_TX(oldi->oldi_instance) | OLDI_PWRDN_BG; + break; + case OLDI_MODE_CLONE_SINGLE_LINK: + case OLDI_MODE_DUAL_LINK: + /* Power-on both the OLDI TXes' IOs */ + mask = OLDI_PWRDOWN_TX(oldi->oldi_instance) | + OLDI_PWRDOWN_TX(oldi->companion_instance) | + OLDI_PWRDN_BG; + break; + default: + /* + * This code execution should never reach here as any + * OLDI with an unsupported OLDI mode would never get + * registered in the first place. + * However, power-off the OLDI in concern just in case. + */ + mask = OLDI_PWRDOWN_TX(oldi->oldi_instance); + enable = false; + break; + } + } else { + switch (oldi->link_type) { + case OLDI_MODE_CLONE_SINGLE_LINK: + case OLDI_MODE_DUAL_LINK: + mask = OLDI_PWRDOWN_TX(oldi->oldi_instance) | + OLDI_PWRDOWN_TX(oldi->companion_instance) | + OLDI_PWRDN_BG; + break; + case OLDI_MODE_SINGLE_LINK: + default: + mask = OLDI_PWRDOWN_TX(oldi->oldi_instance); + break; + } + } + + regmap_update_bits(oldi->io_ctrl, OLDI_PD_CTRL, mask, enable ? 0 : mask); +} + +static int tidss_oldi_config(struct tidss_oldi *oldi) +{ + const struct oldi_bus_format *bus_fmt = NULL; + u32 oldi_cfg = 0; + int ret; + + bus_fmt = oldi->bus_format; + + /* + * MASTERSLAVE and SRC bits of OLDI Config are always set to 0. + */ + + if (bus_fmt->data_width == 24) + oldi_cfg |= OLDI_MSB; + else if (bus_fmt->data_width != 18) + dev_warn(oldi->dev, + "OLDI%u: DSS port width %d not supported\n", + oldi->oldi_instance, bus_fmt->data_width); + + oldi_cfg |= OLDI_DEPOL; + + oldi_cfg = (oldi_cfg & (~OLDI_MAP)) | (bus_fmt->oldi_mode_reg_val << 1); + + oldi_cfg |= OLDI_SOFTRST; + + oldi_cfg |= OLDI_ENABLE; + + switch (oldi->link_type) { + case OLDI_MODE_SINGLE_LINK: + /* All configuration is done for this mode. */ + break; + + case OLDI_MODE_CLONE_SINGLE_LINK: + oldi_cfg |= OLDI_CLONE_MODE; + break; + + case OLDI_MODE_DUAL_LINK: + /* data-mapping field also indicates dual-link mode */ + oldi_cfg |= BIT(3); + oldi_cfg |= OLDI_DUALMODESYNC; + break; + + default: + dev_err(oldi->dev, "OLDI%u: Unsupported mode.\n", + oldi->oldi_instance); + return -EINVAL; + } + + ret = tidss_configure_oldi(oldi->tidss, oldi->parent_vp, oldi_cfg); + if (ret == -ETIMEDOUT) + dev_warn(oldi->dev, "OLDI%u: timeout waiting for OLDI reset done.\n", + oldi->oldi_instance); + + return ret; +} + +static void tidss_oldi_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct tidss_oldi *oldi = drm_bridge_to_tidss_oldi(bridge); + struct drm_connector *connector; + struct drm_connector_state *conn_state; + struct drm_crtc_state *crtc_state; + struct drm_display_mode *mode; + + if (oldi->link_type == OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK) + return; + + connector = drm_atomic_get_new_connector_for_encoder(state, + bridge->encoder); + if (WARN_ON(!connector)) + return; + + conn_state = drm_atomic_get_new_connector_state(state, connector); + if (WARN_ON(!conn_state)) + return; + + crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (WARN_ON(!crtc_state)) + return; + + mode = &crtc_state->adjusted_mode; + + /* Configure the OLDI params*/ + tidss_oldi_config(oldi); + + /* Set the OLDI serial clock (7 times the pixel clock) */ + tidss_oldi_set_serial_clk(oldi, mode->clock * 7 * 1000); + + /* Enable OLDI IO power */ + tidss_oldi_tx_power(oldi, true); +} + +static void tidss_oldi_atomic_post_disable(struct drm_bridge *bridge, + struct drm_atomic_state *state) +{ + struct tidss_oldi *oldi = drm_bridge_to_tidss_oldi(bridge); + + if (oldi->link_type == OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK) + return; + + /* Disable OLDI IO power */ + tidss_oldi_tx_power(oldi, false); + + /* Set the OLDI serial clock to IDLE Frequency */ + tidss_oldi_set_serial_clk(oldi, OLDI_IDLE_CLK_HZ); + + /* Clear OLDI Config */ + tidss_disable_oldi(oldi->tidss, oldi->parent_vp); +} + +#define MAX_INPUT_SEL_FORMATS 1 + +static u32 *tidss_oldi_atomic_get_input_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts) +{ + struct tidss_oldi *oldi = drm_bridge_to_tidss_oldi(bridge); + u32 *input_fmts; + int i; + + *num_input_fmts = 0; + + for (i = 0; i < ARRAY_SIZE(oldi_bus_formats); i++) + if (oldi_bus_formats[i].bus_fmt == output_fmt) + break; + + if (i == ARRAY_SIZE(oldi_bus_formats)) + return NULL; + + input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts), + GFP_KERNEL); + if (!input_fmts) + return NULL; + + *num_input_fmts = 1; + input_fmts[0] = oldi_bus_formats[i].input_bus_fmt; + oldi->bus_format = &oldi_bus_formats[i]; + + return input_fmts; +} + +static const struct drm_bridge_funcs tidss_oldi_bridge_funcs = { + .attach = tidss_oldi_bridge_attach, + .atomic_pre_enable = tidss_oldi_atomic_pre_enable, + .atomic_post_disable = tidss_oldi_atomic_post_disable, + .atomic_get_input_bus_fmts = tidss_oldi_atomic_get_input_bus_fmts, + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, +}; + +static int get_oldi_mode(struct device_node *oldi_tx, int *companion_instance) +{ + struct device_node *companion; + struct device_node *port0, *port1; + u32 companion_reg; + bool secondary_oldi = false; + int pixel_order; + + /* + * Find if the OLDI is paired with another OLDI for combined OLDI + * operation (dual-link or clone). + */ + companion = of_parse_phandle(oldi_tx, "ti,companion-oldi", 0); + if (!companion) + /* + * The OLDI TX does not have a companion, nor is it a + * secondary OLDI. It will operate independently. + */ + return OLDI_MODE_SINGLE_LINK; + + if (of_property_read_u32(companion, "reg", &companion_reg)) + return OLDI_MODE_UNSUPPORTED; + + if (companion_reg > (TIDSS_MAX_OLDI_TXES - 1)) + /* Invalid companion OLDI reg value. */ + return OLDI_MODE_UNSUPPORTED; + + *companion_instance = (int)companion_reg; + + if (of_property_read_bool(oldi_tx, "ti,secondary-oldi")) + secondary_oldi = true; + + /* + * We need to work out if the sink is expecting us to function in + * dual-link mode. We do this by looking at the DT port nodes, the + * OLDI TX ports are connected to. If they are marked as expecting + * even pixels and odd pixels, then we need to enable dual-link. + */ + port0 = of_graph_get_port_by_id(oldi_tx, 1); + port1 = of_graph_get_port_by_id(companion, 1); + pixel_order = drm_of_lvds_get_dual_link_pixel_order(port0, port1); + of_node_put(port0); + of_node_put(port1); + of_node_put(companion); + + switch (pixel_order) { + case -EINVAL: + /* + * The dual-link properties were not found in at least + * one of the sink nodes. Since 2 OLDI ports are present + * in the DT, it can be safely assumed that the required + * configuration is Clone Mode. + */ + return (secondary_oldi ? OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK : + OLDI_MODE_CLONE_SINGLE_LINK); + + case DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS: + /* + * Primary OLDI can only support "ODD" pixels. So, from its + * perspective, the pixel order has to be ODD-EVEN. + */ + return (secondary_oldi ? OLDI_MODE_UNSUPPORTED : + OLDI_MODE_DUAL_LINK); + + case DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS: + /* + * Secondary OLDI can only support "EVEN" pixels. So, from its + * perspective, the pixel order has to be EVEN-ODD. + */ + return (secondary_oldi ? OLDI_MODE_SECONDARY_DUAL_LINK : + OLDI_MODE_UNSUPPORTED); + + default: + return OLDI_MODE_UNSUPPORTED; + } +} + +static int get_parent_dss_vp(struct device_node *oldi_tx, u32 *parent_vp) +{ + struct device_node *ep, *dss_port; + int ret; + + ep = of_graph_get_endpoint_by_regs(oldi_tx, OLDI_INPUT_PORT, -1); + if (ep) { + dss_port = of_graph_get_remote_port(ep); + if (!dss_port) { + ret = -ENODEV; + goto err_return_ep_port; + } + + ret = of_property_read_u32(dss_port, "reg", parent_vp); + + of_node_put(dss_port); +err_return_ep_port: + of_node_put(ep); + return ret; + } + + return -ENODEV; +} + +static const struct drm_bridge_timings default_tidss_oldi_timings = { + .input_bus_flags = DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE + | DRM_BUS_FLAG_DE_HIGH, +}; + +void tidss_oldi_deinit(struct tidss_device *tidss) +{ + for (int i = 0; i < tidss->num_oldis; i++) { + if (tidss->oldis[i]) { + drm_bridge_remove(&tidss->oldis[i]->bridge); + tidss->oldis[i] = NULL; + } + } +} + +int tidss_oldi_init(struct tidss_device *tidss) +{ + struct tidss_oldi *oldi; + struct device_node *child; + struct drm_bridge *bridge; + u32 parent_vp, oldi_instance; + int companion_instance = -1; + enum tidss_oldi_link_type link_type = OLDI_MODE_UNSUPPORTED; + struct device_node *oldi_parent; + int ret = 0; + + tidss->num_oldis = 0; + + oldi_parent = of_get_child_by_name(tidss->dev->of_node, "oldi-transmitters"); + if (!oldi_parent) + /* Return gracefully */ + return 0; + + for_each_available_child_of_node(oldi_parent, child) { + ret = get_parent_dss_vp(child, &parent_vp); + if (ret) { + if (ret == -ENODEV) { + /* + * ENODEV means that this particular OLDI node + * is not connected with the DSS, which is not + * a harmful case. There could be another OLDI + * which may still be connected. + * Continue to search for that. + */ + continue; + } + goto err_put_node; + } + + ret = of_property_read_u32(child, "reg", &oldi_instance); + if (ret) + goto err_put_node; + + /* + * Now that it's confirmed that OLDI is connected with DSS, + * let's continue getting the OLDI sinks ahead and other OLDI + * properties. + */ + bridge = devm_drm_of_get_bridge(tidss->dev, child, + OLDI_OUTPUT_PORT, 0); + if (IS_ERR(bridge)) { + /* + * Either there was no OLDI sink in the devicetree, or + * the OLDI sink has not been added yet. In any case, + * return. + * We don't want to have an OLDI node connected to DSS + * but not to any sink. + */ + ret = dev_err_probe(tidss->dev, PTR_ERR(bridge), + "no panel/bridge for OLDI%u.\n", + oldi_instance); + goto err_put_node; + } + + link_type = get_oldi_mode(child, &companion_instance); + if (link_type == OLDI_MODE_UNSUPPORTED) { + ret = dev_err_probe(tidss->dev, -EINVAL, + "OLDI%u: Unsupported OLDI connection.\n", + oldi_instance); + goto err_put_node; + } else if ((link_type == OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK) || + (link_type == OLDI_MODE_CLONE_SINGLE_LINK)) { + /* + * The OLDI driver cannot support OLDI clone mode + * properly at present. + * The clone mode requires 2 working encoder-bridge + * pipelines, generating from the same crtc. The DRM + * framework does not support this at present. If + * there were to be, say, 2 OLDI sink bridges each + * connected to an OLDI TXes, they couldn't both be + * supported simultaneously. + * This driver still has some code pertaining to OLDI + * clone mode configuration in DSS hardware for future, + * when there is a better infrastructure in the DRM + * framework to support 2 encoder-bridge pipelines + * simultaneously. + * Till that time, this driver shall error out if it + * detects a clone mode configuration. + */ + ret = dev_err_probe(tidss->dev, -EOPNOTSUPP, + "The OLDI driver does not support Clone Mode at present.\n"); + goto err_put_node; + } else if (link_type == OLDI_MODE_SECONDARY_DUAL_LINK) { + /* + * This is the secondary OLDI node, which serves as a + * companion to the primary OLDI, when it is configured + * for the dual-link mode. Since the primary OLDI will + * be a part of bridge chain, no need to put this one + * too. Continue onto the next OLDI node. + */ + continue; + } + + oldi = devm_drm_bridge_alloc(tidss->dev, struct tidss_oldi, bridge, + &tidss_oldi_bridge_funcs); + if (IS_ERR(oldi)) { + ret = PTR_ERR(oldi); + goto err_put_node; + } + + oldi->parent_vp = parent_vp; + oldi->oldi_instance = oldi_instance; + oldi->companion_instance = companion_instance; + oldi->link_type = link_type; + oldi->dev = tidss->dev; + oldi->next_bridge = bridge; + + /* + * Only the primary OLDI needs to reference the io-ctrl system + * registers, and the serial clock. + * We don't require a check for secondary OLDI in dual-link mode + * because the driver will not create a drm_bridge instance. + * But the driver will need to create a drm_bridge instance, + * for secondary OLDI in clone mode (once it is supported). + */ + if (link_type != OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK) { + oldi->io_ctrl = syscon_regmap_lookup_by_phandle(child, + "ti,oldi-io-ctrl"); + if (IS_ERR(oldi->io_ctrl)) { + ret = dev_err_probe(oldi->dev, PTR_ERR(oldi->io_ctrl), + "OLDI%u: syscon_regmap_lookup_by_phandle failed.\n", + oldi_instance); + goto err_put_node; + } + + oldi->serial = of_clk_get_by_name(child, "serial"); + if (IS_ERR(oldi->serial)) { + ret = dev_err_probe(oldi->dev, PTR_ERR(oldi->serial), + "OLDI%u: Failed to get serial clock.\n", + oldi_instance); + goto err_put_node; + } + } + + /* Register the bridge. */ + oldi->bridge.of_node = child; + oldi->bridge.driver_private = oldi; + oldi->bridge.timings = &default_tidss_oldi_timings; + + tidss->oldis[tidss->num_oldis++] = oldi; + oldi->tidss = tidss; + + drm_bridge_add(&oldi->bridge); + } + + of_node_put(child); + of_node_put(oldi_parent); + + return 0; + +err_put_node: + of_node_put(child); + of_node_put(oldi_parent); + return ret; +} diff --git a/drivers/gpu/drm/tidss/tidss_oldi.h b/drivers/gpu/drm/tidss/tidss_oldi.h new file mode 100644 index 000000000000..8cd535c5ee65 --- /dev/null +++ b/drivers/gpu/drm/tidss/tidss_oldi.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025 - Texas Instruments Incorporated + * + * Aradhya Bhatia <a-bhatia1@ti.com> + */ + +#ifndef __TIDSS_OLDI_H__ +#define __TIDSS_OLDI_H__ + +#include "tidss_drv.h" + +struct tidss_oldi; + +/* OLDI PORTS */ +#define OLDI_INPUT_PORT 0 +#define OLDI_OUTPUT_PORT 1 + +/* Control MMR Registers */ + +/* Register offsets */ +#define OLDI_PD_CTRL 0x100 +#define OLDI_LB_CTRL 0x104 + +/* Power control bits */ +#define OLDI_PWRDOWN_TX(n) BIT(n) + +/* LVDS Bandgap reference Enable/Disable */ +#define OLDI_PWRDN_BG BIT(8) + +enum tidss_oldi_link_type { + OLDI_MODE_UNSUPPORTED, + OLDI_MODE_SINGLE_LINK, + OLDI_MODE_CLONE_SINGLE_LINK, + OLDI_MODE_SECONDARY_CLONE_SINGLE_LINK, + OLDI_MODE_DUAL_LINK, + OLDI_MODE_SECONDARY_DUAL_LINK, +}; + +int tidss_oldi_init(struct tidss_device *tidss); +void tidss_oldi_deinit(struct tidss_device *tidss); + +#endif /* __TIDSS_OLDI_H__ */ diff --git a/drivers/gpu/drm/tidss/tidss_plane.h b/drivers/gpu/drm/tidss/tidss_plane.h index aecaf2728406..92c560c3a621 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.h +++ b/drivers/gpu/drm/tidss/tidss_plane.h @@ -7,6 +7,8 @@ #ifndef __TIDSS_PLANE_H__ #define __TIDSS_PLANE_H__ +#include <drm/drm_plane.h> + #define to_tidss_plane(p) container_of((p), struct tidss_plane, plane) struct tidss_device; diff --git a/drivers/gpu/drm/tidss/tidss_scale_coefs.h b/drivers/gpu/drm/tidss/tidss_scale_coefs.h index 9c560d0fdac0..9824d02d9d1f 100644 --- a/drivers/gpu/drm/tidss/tidss_scale_coefs.h +++ b/drivers/gpu/drm/tidss/tidss_scale_coefs.h @@ -9,6 +9,8 @@ #include <linux/types.h> +struct device; + struct tidss_scale_coefs { s16 c2[16]; s16 c1[16]; diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 06e54694a7f2..7d9e85e932d7 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -82,6 +82,21 @@ config DRM_PANEL_MIPI_DBI https://github.com/notro/panel-mipi-dbi/wiki. To compile this driver as a module, choose M here. +config DRM_PIXPAPER + tristate "DRM support for PIXPAPER display panels" + depends on DRM && SPI + select DRM_CLIENT_SELECTION + select DRM_GEM_SHMEM_HELPER + select DRM_KMS_HELPER + help + DRM driver for the Mayqueen Pixpaper e-ink display panel. + + This driver supports small e-paper displays connected over SPI, + with a resolution of 122x250 and XRGB8888 framebuffer format. + It is intended for low-power embedded applications. + + If M is selected, the module will be built as pixpaper.ko. + config TINYDRM_HX8357D tristate "DRM support for HX8357D display panels" depends on DRM && SPI diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile index 4a9ff61ec254..48d30bf6152f 100644 --- a/drivers/gpu/drm/tiny/Makefile +++ b/drivers/gpu/drm/tiny/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_BOCHS) += bochs.o obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus-qemu.o obj-$(CONFIG_DRM_GM12U320) += gm12u320.o obj-$(CONFIG_DRM_PANEL_MIPI_DBI) += panel-mipi-dbi.o +obj-$(CONFIG_DRM_PIXPAPER) += pixpaper.o obj-$(CONFIG_TINYDRM_HX8357D) += hx8357d.o obj-$(CONFIG_TINYDRM_ILI9163) += ili9163.o obj-$(CONFIG_TINYDRM_ILI9225) += ili9225.o diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 8d3b7c4fa6a4..d2d5e9f1269f 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -252,7 +252,7 @@ static int bochs_hw_init(struct bochs_device *bochs) } bochs->ioports = 1; } else { - dev_err(dev->dev, "I/O ports are not supported\n"); + drm_err(dev, "I/O ports are not supported\n"); return -EIO; } diff --git a/drivers/gpu/drm/tiny/pixpaper.c b/drivers/gpu/drm/tiny/pixpaper.c new file mode 100644 index 000000000000..32598fb2fee7 --- /dev/null +++ b/drivers/gpu/drm/tiny/pixpaper.c @@ -0,0 +1,1165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DRM driver for PIXPAPER e-ink panel + * + * Author: LiangCheng Wang <zaq14760@gmail.com>, + */ +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/spi/spi.h> + +#include <drm/clients/drm_client_setup.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_fbdev_shmem.h> +#include <drm/drm_framebuffer.h> +#include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_probe_helper.h> + +/* + * Note on Undocumented Commands/Registers: + * + * Several commands and register parameters defined in this header are not + * documented in the datasheet. Their values and usage have been derived + * through analysis of existing userspace example programs. + * + * These 'unknown' definitions are crucial for the proper initialization + * and stable operation of the panel. Modifying these values without + * thorough understanding may lead to display anomalies, panel damage, + * or unexpected behavior. + */ + +/* Command definitions */ +#define PIXPAPER_CMD_PANEL_SETTING 0x00 /* R00H: Panel settings */ +#define PIXPAPER_CMD_POWER_SETTING 0x01 /* R01H: Power settings */ +#define PIXPAPER_CMD_POWER_OFF 0x02 /* R02H: Power off */ +#define PIXPAPER_CMD_POWER_OFF_SEQUENCE 0x03 /* R03H: Power off sequence */ +#define PIXPAPER_CMD_POWER_ON 0x04 /* R04H: Power on */ +#define PIXPAPER_CMD_BOOSTER_SOFT_START 0x06 /* R06H: Booster soft start */ +#define PIXPAPER_CMD_DEEP_SLEEP 0x07 /* R07H: Deep sleep */ +#define PIXPAPER_CMD_DATA_START_TRANSMISSION 0x10 +/* R10H: Data transmission start */ +#define PIXPAPER_CMD_DISPLAY_REFRESH 0x12 /* R12H: Display refresh */ +#define PIXPAPER_CMD_PLL_CONTROL 0x30 /* R30H: PLL control */ +#define PIXPAPER_CMD_TEMP_SENSOR_CALIB 0x41 +/* R41H: Temperature sensor calibration */ +#define PIXPAPER_CMD_UNKNOWN_4D 0x4D /* R4DH: Unknown command */ +#define PIXPAPER_CMD_VCOM_INTERVAL 0x50 /* R50H: VCOM interval */ +#define PIXPAPER_CMD_UNKNOWN_60 0x60 /* R60H: Unknown command */ +#define PIXPAPER_CMD_RESOLUTION_SETTING 0x61 /* R61H: Resolution settings */ +#define PIXPAPER_CMD_GATE_SOURCE_START 0x65 /* R65H: Gate/source start */ +#define PIXPAPER_CMD_UNKNOWN_B4 0xB4 /* RB4H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_B5 0xB5 /* RB5H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_E0 0xE0 /* RE0H: Unknown command */ +#define PIXPAPER_CMD_POWER_SAVING 0xE3 /* RE3H: Power saving */ +#define PIXPAPER_CMD_UNKNOWN_E7 0xE7 /* RE7H: Unknown command */ +#define PIXPAPER_CMD_UNKNOWN_E9 0xE9 /* RE9H: Unknown command */ + +/* R00H PSR - First Parameter */ +#define PIXPAPER_PSR_RST_N BIT(0) +/* Bit 0: RST_N, 1=no effect (default), 0=reset with booster OFF */ +#define PIXPAPER_PSR_SHD_N BIT(1) +/* Bit 1: SHD_N, 1=booster ON (default), 0=booster OFF */ +#define PIXPAPER_PSR_SHL BIT(2) +/* Bit 2: SHL, 1=shift right (default), 0=shift left */ +#define PIXPAPER_PSR_UD BIT(3) +/* Bit 3: UD, 1=scan up (default), 0=scan down */ +#define PIXPAPER_PSR_PST_MODE BIT(5) +/* Bit 5: PST_MODE, 0=frame scanning (default), 1=external */ +#define PIXPAPER_PSR_RES_MASK (3 << 6) +/* Bits 7-6: RES[1:0], resolution setting */ +#define PIXPAPER_PSR_RES_176x296 (0x0 << 6) /* 00: 176x296 */ +#define PIXPAPER_PSR_RES_128x296 (0x1 << 6) /* 01: 128x296 */ +#define PIXPAPER_PSR_RES_128x250 (0x2 << 6) /* 10: 128x250 */ +#define PIXPAPER_PSR_RES_112x204 (0x3 << 6) /* 11: 112x204 */ +#define PIXPAPER_PSR_CONFIG \ + (PIXPAPER_PSR_RST_N | PIXPAPER_PSR_SHD_N | PIXPAPER_PSR_SHL | \ + PIXPAPER_PSR_UD) +/* 0x0F: Default settings, resolution set by R61H */ + +/* R00H PSR - Second Parameter */ +#define PIXPAPER_PSR2_VC_LUTZ \ + (1 << 0) /* Bit 0: VC_LUTZ, 1=VCOM float after refresh (default), 0=no effect */ +#define PIXPAPER_PSR2_NORG \ + (1 << 1) /* Bit 1: NORG, 1=VCOM to GND before power off, 0=no effect (default) */ +#define PIXPAPER_PSR2_TIEG \ + (1 << 2) /* Bit 2: TIEG, 1=VGN to GND on power off, 0=no effect (default) */ +#define PIXPAPER_PSR2_TS_AUTO \ + (1 << 3) /* Bit 3: TS_AUTO, 1=sensor on RST_N low to high (default), 0=on booster */ +#define PIXPAPER_PSR2_VCMZ \ + (1 << 4) /* Bit 4: VCMZ, 1=VCOM always floating, 0=no effect (default) */ +#define PIXPAPER_PSR2_FOPT \ + (1 << 5) /* Bit 5: FOPT, 0=scan 1 frame (default), 1=no scan, HiZ */ +#define PIXPAPER_PSR_CONFIG2 \ + (PIXPAPER_PSR2_VC_LUTZ | \ + PIXPAPER_PSR2_TS_AUTO) /* 0x09: Default VCOM and temp sensor settings */ + +/* R01H PWR - Power Setting Register */ +/* First Parameter */ +#define PIXPAPER_PWR_VDG_EN \ + (1 << 0) /* Bit 0: VDG_EN, 1=internal DCDC for VGP/VGN (default), 0=external */ +#define PIXPAPER_PWR_VDS_EN \ + (1 << 1) /* Bit 1: VDS_EN, 1=internal regulator for VSP/VSN (default), 0=external */ +#define PIXPAPER_PWR_VSC_EN \ + (1 << 2) /* Bit 2: VSC_EN, 1=internal regulator for VSPL (default), 0=external */ +#define PIXPAPER_PWR_V_MODE \ + (1 << 3) /* Bit 3: V_MODE, 0=Mode0 (default), 1=Mode1 */ +#define PIXPAPER_PWR_CONFIG1 \ + (PIXPAPER_PWR_VDG_EN | PIXPAPER_PWR_VDS_EN | \ + PIXPAPER_PWR_VSC_EN) /* 0x07: Internal power for VGP/VGN, VSP/VSN, VSPL */ + +/* Second Parameter */ +#define PIXPAPER_PWR_VGPN_MASK \ + (3 << 0) /* Bits 1-0: VGPN, VGP/VGN voltage levels */ +#define PIXPAPER_PWR_VGPN_20V (0x0 << 0) /* 00: VGP=20V, VGN=-20V (default) */ +#define PIXPAPER_PWR_VGPN_17V (0x1 << 0) /* 01: VGP=17V, VGN=-17V */ +#define PIXPAPER_PWR_VGPN_15V (0x2 << 0) /* 10: VGP=15V, VGN=-15V */ +#define PIXPAPER_PWR_VGPN_10V (0x3 << 0) /* 11: VGP=10V, VGN=-10V */ +#define PIXPAPER_PWR_CONFIG2 PIXPAPER_PWR_VGPN_20V /* 0x00: VGP=20V, VGN=-20V */ + +/* Third, Fourth, Sixth Parameters (VSP_1, VSPL_0, VSPL_1) */ +#define PIXPAPER_PWR_VSP_8_2V 0x22 /* VSP_1/VSPL_1: 8.2V (34 decimal) */ +#define PIXPAPER_PWR_VSPL_15V 0x78 /* VSPL_0: 15V (120 decimal) */ + +/* Fifth Parameter (VSN_1) */ +#define PIXPAPER_PWR_VSN_4V 0x0A /* VSN_1: -4V (10 decimal) */ + +/* R03H PFS - Power Off Sequence Setting Register */ +/* First Parameter */ +#define PIXPAPER_PFS_T_VDS_OFF_MASK \ + (3 << 0) /* Bits 1-0: T_VDS_OFF, VSP/VSN power-off sequence */ +#define PIXPAPER_PFS_T_VDS_OFF_20MS (0x0 << 0) /* 00: 20 ms (default) */ +#define PIXPAPER_PFS_T_VDS_OFF_40MS (0x1 << 0) /* 01: 40 ms */ +#define PIXPAPER_PFS_T_VDS_OFF_60MS (0x2 << 0) /* 10: 60 ms */ +#define PIXPAPER_PFS_T_VDS_OFF_80MS (0x3 << 0) /* 11: 80 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_MASK \ + (3 << 4) /* Bits 5-4: T_VDPG_OFF, VGP/VGN power-off sequence */ +#define PIXPAPER_PFS_T_VDPG_OFF_20MS (0x0 << 4) /* 00: 20 ms (default) */ +#define PIXPAPER_PFS_T_VDPG_OFF_40MS (0x1 << 4) /* 01: 40 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_60MS (0x2 << 4) /* 10: 60 ms */ +#define PIXPAPER_PFS_T_VDPG_OFF_80MS (0x3 << 4) /* 11: 80 ms */ +#define PIXPAPER_PFS_CONFIG1 \ + (PIXPAPER_PFS_T_VDS_OFF_20MS | \ + PIXPAPER_PFS_T_VDPG_OFF_20MS) /* 0x10: Default 20 ms for VSP/VSN and VGP/VGN */ + +/* Second Parameter */ +#define PIXPAPER_PFS_VGP_EXT_MASK \ + (0xF << 0) /* Bits 3-0: VGP_EXT, VGP extension time */ +#define PIXPAPER_PFS_VGP_EXT_0MS (0x0 << 0) /* 0000: 0 ms */ +#define PIXPAPER_PFS_VGP_EXT_500MS (0x1 << 0) /* 0001: 500 ms */ +#define PIXPAPER_PFS_VGP_EXT_1000MS (0x2 << 0) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_VGP_EXT_1500MS (0x3 << 0) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_VGP_EXT_2000MS (0x4 << 0) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_VGP_EXT_2500MS (0x5 << 0) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_VGP_EXT_3000MS (0x6 << 0) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_VGP_EXT_3500MS (0x7 << 0) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_VGP_EXT_4000MS (0x8 << 0) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_VGP_EXT_4500MS (0x9 << 0) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_VGP_EXT_5000MS (0xA << 0) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_VGP_EXT_5500MS (0xB << 0) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_VGP_EXT_6000MS (0xC << 0) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_VGP_EXT_6500MS (0xD << 0) /* 1101: 6500 ms */ +#define PIXPAPER_PFS_VGP_LEN_MASK \ + (0xF << 4) /* Bits 7-4: VGP_LEN, VGP at 10V during power-off */ +#define PIXPAPER_PFS_VGP_LEN_0MS (0x0 << 4) /* 0000: 0 ms */ +#define PIXPAPER_PFS_VGP_LEN_500MS (0x1 << 4) /* 0001: 500 ms */ +#define PIXPAPER_PFS_VGP_LEN_1000MS (0x2 << 4) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_VGP_LEN_1500MS (0x3 << 4) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_VGP_LEN_2000MS (0x4 << 4) /* 0100: 2000 ms */ +#define PIXPAPER_PFS_VGP_LEN_2500MS (0x5 << 4) /* 0101: 2500 ms (default) */ +#define PIXPAPER_PFS_VGP_LEN_3000MS (0x6 << 4) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_VGP_LEN_3500MS (0x7 << 4) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_VGP_LEN_4000MS (0x8 << 4) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_VGP_LEN_4500MS (0x9 << 4) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_VGP_LEN_5000MS (0xA << 4) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_VGP_LEN_5500MS (0xB << 4) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_VGP_LEN_6000MS (0xC << 4) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_VGP_LEN_6500MS (0xD << 4) /* 1101: 6500 ms */ +#define PIXPAPER_PFS_CONFIG2 \ + (PIXPAPER_PFS_VGP_EXT_1000MS | \ + PIXPAPER_PFS_VGP_LEN_2500MS) /* 0x54: VGP extension 1000 ms, VGP at 10V for 2500 ms */ + +/* Third Parameter */ +#define PIXPAPER_PFS_XON_LEN_MASK \ + (0xF << 0) /* Bits 3-0: XON_LEN, XON enable time */ +#define PIXPAPER_PFS_XON_LEN_0MS (0x0 << 0) /* 0000: 0 ms */ +#define PIXPAPER_PFS_XON_LEN_500MS (0x1 << 0) /* 0001: 500 ms */ +#define PIXPAPER_PFS_XON_LEN_1000MS (0x2 << 0) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_XON_LEN_1500MS (0x3 << 0) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_XON_LEN_2000MS (0x4 << 0) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_XON_LEN_2500MS (0x5 << 0) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_XON_LEN_3000MS (0x6 << 0) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_XON_LEN_3500MS (0x7 << 0) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_XON_LEN_4000MS (0x8 << 0) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_XON_LEN_4500MS (0x9 << 0) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_XON_LEN_5000MS (0xA << 0) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_XON_LEN_5500MS (0xB << 0) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_XON_LEN_6000MS (0xC << 0) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_XON_DLY_MASK \ + (0xF << 4) /* Bits 7-4: XON_DLY, XON delay time */ +#define PIXPAPER_PFS_XON_DLY_0MS (0x0 << 4) /* 0000: 0 ms */ +#define PIXPAPER_PFS_XON_DLY_500MS (0x1 << 4) /* 0001: 500 ms */ +#define PIXPAPER_PFS_XON_DLY_1000MS (0x2 << 4) /* 0010: 1000 ms */ +#define PIXPAPER_PFS_XON_DLY_1500MS (0x3 << 4) /* 0011: 1500 ms */ +#define PIXPAPER_PFS_XON_DLY_2000MS (0x4 << 4) /* 0100: 2000 ms (default) */ +#define PIXPAPER_PFS_XON_DLY_2500MS (0x5 << 4) /* 0101: 2500 ms */ +#define PIXPAPER_PFS_XON_DLY_3000MS (0x6 << 4) /* 0110: 3000 ms */ +#define PIXPAPER_PFS_XON_DLY_3500MS (0x7 << 4) /* 0111: 3500 ms */ +#define PIXPAPER_PFS_XON_DLY_4000MS (0x8 << 4) /* 1000: 4000 ms */ +#define PIXPAPER_PFS_XON_DLY_4500MS (0x9 << 4) /* 1001: 4500 ms */ +#define PIXPAPER_PFS_XON_DLY_5000MS (0xA << 4) /* 1010: 5000 ms */ +#define PIXPAPER_PFS_XON_DLY_5500MS (0xB << 4) /* 1011: 5500 ms */ +#define PIXPAPER_PFS_XON_DLY_6000MS (0xC << 4) /* 1100: 6000 ms */ +#define PIXPAPER_PFS_CONFIG3 \ + (PIXPAPER_PFS_XON_LEN_2000MS | \ + PIXPAPER_PFS_XON_DLY_2000MS) /* 0x44: XON enable and delay at 2000 ms */ + +/* R06H BTST - Booster Soft Start Command */ +/* First Parameter */ +#define PIXPAPER_BTST_PHA_SFT_MASK \ + (3 << 0) /* Bits 1-0: PHA_SFT, soft start period for phase A */ +#define PIXPAPER_BTST_PHA_SFT_10MS (0x0 << 0) /* 00: 10 ms (default) */ +#define PIXPAPER_BTST_PHA_SFT_20MS (0x1 << 0) /* 01: 20 ms */ +#define PIXPAPER_BTST_PHA_SFT_30MS (0x2 << 0) /* 10: 30 ms */ +#define PIXPAPER_BTST_PHA_SFT_40MS (0x3 << 0) /* 11: 40 ms */ +#define PIXPAPER_BTST_PHB_SFT_MASK \ + (3 << 2) /* Bits 3-2: PHB_SFT, soft start period for phase B */ +#define PIXPAPER_BTST_PHB_SFT_10MS (0x0 << 2) /* 00: 10 ms (default) */ +#define PIXPAPER_BTST_PHB_SFT_20MS (0x1 << 2) /* 01: 20 ms */ +#define PIXPAPER_BTST_PHB_SFT_30MS (0x2 << 2) /* 10: 30 ms */ +#define PIXPAPER_BTST_PHB_SFT_40MS (0x3 << 2) /* 11: 40 ms */ +#define PIXPAPER_BTST_CONFIG1 \ + (PIXPAPER_BTST_PHA_SFT_40MS | \ + PIXPAPER_BTST_PHB_SFT_40MS) /* 0x0F: 40 ms for phase A and B */ + +/* Second to Seventh Parameters (Driving Strength or Minimum OFF Time) */ +#define PIXPAPER_BTST_CONFIG2 0x0A /* Strength11 */ +#define PIXPAPER_BTST_CONFIG3 0x2F /* Period48 */ +#define PIXPAPER_BTST_CONFIG4 0x25 /* Strength38 */ +#define PIXPAPER_BTST_CONFIG5 0x22 /* Period35 */ +#define PIXPAPER_BTST_CONFIG6 0x2E /* Strength47 */ +#define PIXPAPER_BTST_CONFIG7 0x21 /* Period34 */ + +/* R12H: DRF (Display Refresh) */ +#define PIXPAPER_DRF_VCOM_AC 0x00 /* AC VCOM: VCOM follows LUTC (default) */ +#define PIXPAPER_DRF_VCOM_DC 0x01 /* DC VCOM: VCOM fixed to VCOMDC */ + +/* R30H PLL - PLL Control Register */ +/* First Parameter */ +#define PIXPAPER_PLL_FR_MASK (0x7 << 0) /* Bits 2-0: FR, frame rate */ +#define PIXPAPER_PLL_FR_12_5HZ (0x0 << 0) /* 000: 12.5 Hz */ +#define PIXPAPER_PLL_FR_25HZ (0x1 << 0) /* 001: 25 Hz */ +#define PIXPAPER_PLL_FR_50HZ (0x2 << 0) /* 010: 50 Hz (default) */ +#define PIXPAPER_PLL_FR_65HZ (0x3 << 0) /* 011: 65 Hz */ +#define PIXPAPER_PLL_FR_75HZ (0x4 << 0) /* 100: 75 Hz */ +#define PIXPAPER_PLL_FR_85HZ (0x5 << 0) /* 101: 85 Hz */ +#define PIXPAPER_PLL_FR_100HZ (0x6 << 0) /* 110: 100 Hz */ +#define PIXPAPER_PLL_FR_120HZ (0x7 << 0) /* 111: 120 Hz */ +#define PIXPAPER_PLL_DFR \ + (1 << 3) /* Bit 3: Dynamic frame rate, 0=disabled (default), 1=enabled */ +#define PIXPAPER_PLL_CONFIG \ + (PIXPAPER_PLL_FR_50HZ) /* 0x02: 50 Hz, dynamic frame rate disabled */ + +/* R41H TSE - Temperature Sensor Calibration Register */ +/* First Parameter */ +#define PIXPAPER_TSE_TO_MASK \ + (0xF << 0) /* Bits 3-0: TO[3:0], temperature offset */ +#define PIXPAPER_TSE_TO_POS_0C (0x0 << 0) /* 0000: +0°C (default) */ +#define PIXPAPER_TSE_TO_POS_0_5C (0x1 << 0) /* 0001: +0.5°C */ +#define PIXPAPER_TSE_TO_POS_1C (0x2 << 0) /* 0010: +1°C */ +#define PIXPAPER_TSE_TO_POS_1_5C (0x3 << 0) /* 0011: +1.5°C */ +#define PIXPAPER_TSE_TO_POS_2C (0x4 << 0) /* 0100: +2°C */ +#define PIXPAPER_TSE_TO_POS_2_5C (0x5 << 0) /* 0101: +2.5°C */ +#define PIXPAPER_TSE_TO_POS_3C (0x6 << 0) /* 0110: +3°C */ +#define PIXPAPER_TSE_TO_POS_3_5C (0x7 << 0) /* 0111: +3.5°C */ +#define PIXPAPER_TSE_TO_NEG_4C (0x8 << 0) /* 1000: -4°C */ +#define PIXPAPER_TSE_TO_NEG_3_5C (0x9 << 0) /* 1001: -3.5°C */ +#define PIXPAPER_TSE_TO_NEG_3C (0xA << 0) /* 1010: -3°C */ +#define PIXPAPER_TSE_TO_NEG_2_5C (0xB << 0) /* 1011: -2.5°C */ +#define PIXPAPER_TSE_TO_NEG_2C (0xC << 0) /* 1100: -2°C */ +#define PIXPAPER_TSE_TO_NEG_1_5C (0xD << 0) /* 1101: -1.5°C */ +#define PIXPAPER_TSE_TO_NEG_1C (0xE << 0) /* 1110: -1°C */ +#define PIXPAPER_TSE_TO_NEG_0_5C (0xF << 0) /* 1111: -0.5°C */ +#define PIXPAPER_TSE_TO_FINE_MASK \ + (0x3 << 4) /* Bits 5-4: TO[5:4], fine adjustment for positive offsets */ +#define PIXPAPER_TSE_TO_FINE_0C (0x0 << 4) /* 00: +0.0°C (default) */ +#define PIXPAPER_TSE_TO_FINE_0_25C (0x1 << 4) /* 01: +0.25°C */ +#define PIXPAPER_TSE_ENABLE \ + (0 << 7) /* Bit 7: TSE, 0=internal sensor enabled (default), 1=disabled (external) */ +#define PIXPAPER_TSE_DISABLE \ + (1 << 7) /* Bit 7: TSE, 1=internal sensor disabled, use external */ +#define PIXPAPER_TSE_CONFIG \ + (PIXPAPER_TSE_TO_POS_0C | PIXPAPER_TSE_TO_FINE_0C | \ + PIXPAPER_TSE_ENABLE) /* 0x00: Internal sensor enabled, +0°C offset */ + +/* R4DH */ +#define PIXPAPER_UNKNOWN_4D_CONFIG \ + 0x78 /* This value is essential for initialization, derived from userspace examples. */ + +/* R50H CDI - VCOM and DATA Interval Setting Register */ +/* First Parameter */ +#define PIXPAPER_CDI_INTERVAL_MASK \ + (0xF << 0) /* Bits 3-0: CDI[3:0], VCOM and data interval (hsync) */ +#define PIXPAPER_CDI_17_HSYNC (0x0 << 0) /* 0000: 17 hsync */ +#define PIXPAPER_CDI_16_HSYNC (0x1 << 0) /* 0001: 16 hsync */ +#define PIXPAPER_CDI_15_HSYNC (0x2 << 0) /* 0010: 15 hsync */ +#define PIXPAPER_CDI_14_HSYNC (0x3 << 0) /* 0011: 14 hsync */ +#define PIXPAPER_CDI_13_HSYNC (0x4 << 0) /* 0100: 13 hsync */ +#define PIXPAPER_CDI_12_HSYNC (0x5 << 0) /* 0101: 12 hsync */ +#define PIXPAPER_CDI_11_HSYNC (0x6 << 0) /* 0110: 11 hsync */ +#define PIXPAPER_CDI_10_HSYNC (0x7 << 0) /* 0111: 10 hsync (default) */ +#define PIXPAPER_CDI_9_HSYNC (0x8 << 0) /* 1000: 9 hsync */ +#define PIXPAPER_CDI_8_HSYNC (0x9 << 0) /* 1001: 8 hsync */ +#define PIXPAPER_CDI_7_HSYNC (0xA << 0) /* 1010: 7 hsync */ +#define PIXPAPER_CDI_6_HSYNC (0xB << 0) /* 1011: 6 hsync */ +#define PIXPAPER_CDI_5_HSYNC (0xC << 0) /* 1100: 5 hsync */ +#define PIXPAPER_CDI_4_HSYNC (0xD << 0) /* 1101: 4 hsync */ +#define PIXPAPER_CDI_3_HSYNC (0xE << 0) /* 1110: 3 hsync */ +#define PIXPAPER_CDI_2_HSYNC (0xF << 0) /* 1111: 2 hsync */ +#define PIXPAPER_CDI_DDX \ + (1 << 4) /* Bit 4: DDX, 0=grayscale mapping 0, 1=grayscale mapping 1 (default) */ +#define PIXPAPER_CDI_VBD_MASK \ + (0x7 << 5) /* Bits 7-5: VBD[2:0], border data selection */ +#define PIXPAPER_CDI_VBD_FLOAT (0x0 << 5) /* 000: Floating (DDX=0 or 1) */ +#define PIXPAPER_CDI_VBD_GRAY3_DDX0 \ + (0x1 << 5) /* 001: Gray3 (border_buf=011) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY2_DDX0 \ + (0x2 << 5) /* 010: Gray2 (border_buf=010) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY1_DDX0 \ + (0x3 << 5) /* 011: Gray1 (border_buf=001) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY0_DDX0 \ + (0x4 << 5) /* 100: Gray0 (border_buf=000) when DDX=0 */ +#define PIXPAPER_CDI_VBD_GRAY0_DDX1 \ + (0x0 << 5) /* 000: Gray0 (border_buf=000) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY1_DDX1 \ + (0x1 << 5) /* 001: Gray1 (border_buf=001) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY2_DDX1 \ + (0x2 << 5) /* 010: Gray2 (border_buf=010) when DDX=1 */ +#define PIXPAPER_CDI_VBD_GRAY3_DDX1 \ + (0x3 << 5) /* 011: Gray3 (border_buf=011) when DDX=1 */ +#define PIXPAPER_CDI_VBD_FLOAT_DDX1 (0x4 << 5) /* 100: Floating when DDX=1 */ +#define PIXPAPER_CDI_CONFIG \ + (PIXPAPER_CDI_10_HSYNC | PIXPAPER_CDI_DDX | \ + PIXPAPER_CDI_VBD_GRAY1_DDX1) /* 0x37: 10 hsync, DDX=1, border Gray1 */ + +/* R60H */ +#define PIXPAPER_UNKNOWN_60_CONFIG1 \ + 0x02 /* This value is essential for initialization, derived from userspace examples. */ +#define PIXPAPER_UNKNOWN_60_CONFIG2 \ + 0x02 /* This value is essential for initialization, derived from userspace examples. */ + +/* R61H TRES - Resolution Setting Register */ +#define PIXPAPER_TRES_HRES_H \ + ((PIXPAPER_PANEL_BUFFER_WIDTH >> 8) & \ + 0xFF) /* HRES[9:8]: High byte of horizontal resolution (128) */ +#define PIXPAPER_TRES_HRES_L \ + (PIXPAPER_PANEL_BUFFER_WIDTH & \ + 0xFF) /* HRES[7:0]: Low byte of horizontal resolution (128 = 0x80) */ +#define PIXPAPER_TRES_VRES_H \ + ((PIXPAPER_HEIGHT >> 8) & \ + 0xFF) /* VRES[9:8]: High byte of vertical resolution (250) */ +#define PIXPAPER_TRES_VRES_L \ + (PIXPAPER_HEIGHT & \ + 0xFF) /* VRES[7:0]: Low byte of vertical resolution (250 = 0xFA) */ + +/* R65H GSST - Gate/Source Start Setting Register */ +#define PIXPAPER_GSST_S_START 0x00 /* S_Start[7:0]: First source line (S0) */ +#define PIXPAPER_GSST_RESERVED 0x00 /* Reserved byte */ +#define PIXPAPER_GSST_G_START_H \ + 0x00 /* G_Start[8]: High bit of first gate line (G0) */ +#define PIXPAPER_GSST_G_START_L \ + 0x00 /* G_Start[7:0]: Low byte of first gate line (G0) */ + +/* RB4H */ +#define PIXPAPER_UNKNOWN_B4_CONFIG \ + 0xD0 /* This value is essential for initialization, derived from userspace examples. */ + +/* RB5H */ +#define PIXPAPER_UNKNOWN_B5_CONFIG \ + 0x03 /* This value is essential for initialization, derived from userspace examples. */ + +/* RE0H */ +#define PIXPAPER_UNKNOWN_E0_CONFIG \ + 0x00 /* This value is essential for initialization, derived from userspace examples. */ + +/* RE3H PWS - Power Saving Register */ +/* First Parameter */ +#define PIXPAPER_PWS_VCOM_W_MASK \ + (0xF \ + << 4) /* Bits 7-4: VCOM_W[3:0], VCOM power-saving width (line periods) */ +#define PIXPAPER_PWS_VCOM_W_0 (0x0 << 4) /* 0000: 0 line periods */ +#define PIXPAPER_PWS_VCOM_W_1 (0x1 << 4) /* 0001: 1 line period */ +#define PIXPAPER_PWS_VCOM_W_2 (0x2 << 4) /* 0010: 2 line periods */ +#define PIXPAPER_PWS_VCOM_W_3 (0x3 << 4) /* 0011: 3 line periods */ +#define PIXPAPER_PWS_VCOM_W_4 (0x4 << 4) /* 0100: 4 line periods */ +#define PIXPAPER_PWS_VCOM_W_5 (0x5 << 4) /* 0101: 5 line periods */ +#define PIXPAPER_PWS_VCOM_W_6 (0x6 << 4) /* 0110: 6 line periods */ +#define PIXPAPER_PWS_VCOM_W_7 (0x7 << 4) /* 0111: 7 line periods */ +#define PIXPAPER_PWS_VCOM_W_8 (0x8 << 4) /* 1000: 8 line periods */ +#define PIXPAPER_PWS_VCOM_W_9 (0x9 << 4) /* 1001: 9 line periods */ +#define PIXPAPER_PWS_VCOM_W_10 (0xA << 4) /* 1010: 10 line periods */ +#define PIXPAPER_PWS_VCOM_W_11 (0xB << 4) /* 1011: 11 line periods */ +#define PIXPAPER_PWS_VCOM_W_12 (0xC << 4) /* 1100: 12 line periods */ +#define PIXPAPER_PWS_VCOM_W_13 (0xD << 4) /* 1101: 13 line periods */ +#define PIXPAPER_PWS_VCOM_W_14 (0xE << 4) /* 1110: 14 line periods */ +#define PIXPAPER_PWS_VCOM_W_15 (0xF << 4) /* 1111: 15 line periods */ +#define PIXPAPER_PWS_SD_W_MASK \ + (0xF << 0) /* Bits 3-0: SD_W[3:0], source power-saving width (660 ns units) */ +#define PIXPAPER_PWS_SD_W_0 (0x0 << 0) /* 0000: 0 ns */ +#define PIXPAPER_PWS_SD_W_1 (0x1 << 0) /* 0001: 660 ns */ +#define PIXPAPER_PWS_SD_W_2 (0x2 << 0) /* 0010: 1320 ns */ +#define PIXPAPER_PWS_SD_W_3 (0x3 << 0) /* 0011: 1980 ns */ +#define PIXPAPER_PWS_SD_W_4 (0x4 << 0) /* 0100: 2640 ns */ +#define PIXPAPER_PWS_SD_W_5 (0x5 << 0) /* 0101: 3300 ns */ +#define PIXPAPER_PWS_SD_W_6 (0x6 << 0) /* 0110: 3960 ns */ +#define PIXPAPER_PWS_SD_W_7 (0x7 << 0) /* 0111: 4620 ns */ +#define PIXPAPER_PWS_SD_W_8 (0x8 << 0) /* 1000: 5280 ns */ +#define PIXPAPER_PWS_SD_W_9 (0x9 << 0) /* 1001: 5940 ns */ +#define PIXPAPER_PWS_SD_W_10 (0xA << 0) /* 1010: 6600 ns */ +#define PIXPAPER_PWS_SD_W_11 (0xB << 0) /* 1011: 7260 ns */ +#define PIXPAPER_PWS_SD_W_12 (0xC << 0) /* 1100: 7920 ns */ +#define PIXPAPER_PWS_SD_W_13 (0xD << 0) /* 1101: 8580 ns */ +#define PIXPAPER_PWS_SD_W_14 (0xE << 0) /* 1110: 9240 ns */ +#define PIXPAPER_PWS_SD_W_15 (0xF << 0) /* 1111: 9900 ns */ +#define PIXPAPER_PWS_CONFIG \ + (PIXPAPER_PWS_VCOM_W_2 | \ + PIXPAPER_PWS_SD_W_2) /* 0x22: VCOM 2 line periods (160 µs), source 1320 ns */ + +/* RE7H */ +#define PIXPAPER_UNKNOWN_E7_CONFIG \ + 0x1C /* This value is essential for initialization, derived from userspace examples. */ + +/* RE9H */ +#define PIXPAPER_UNKNOWN_E9_CONFIG \ + 0x01 /* This value is essential for initialization, derived from userspace examples. */ + +MODULE_IMPORT_NS("DMA_BUF"); + +/* + * The panel has a visible resolution of 122x250. + * However, the controller requires the horizontal resolution to be aligned to 128 pixels. + * No porch or sync timing values are provided in the datasheet, so we define minimal + * placeholder values to satisfy the DRM framework. + */ + +/* Panel visible resolution */ +#define PIXPAPER_WIDTH 122 +#define PIXPAPER_HEIGHT 250 + +/* Controller requires 128 horizontal pixels total (for memory alignment) */ +#define PIXPAPER_HTOTAL 128 +#define PIXPAPER_HFP 2 +#define PIXPAPER_HSYNC 2 +#define PIXPAPER_HBP (PIXPAPER_HTOTAL - PIXPAPER_WIDTH - PIXPAPER_HFP - PIXPAPER_HSYNC) + +/* + * According to the datasheet, the total vertical blanking must be 55 lines, + * regardless of how the vertical back porch is set. + * Here we allocate VFP=2, VSYNC=2, and VBP=51 to sum up to 55 lines. + * Total vertical lines = 250 (visible) + 55 (blanking) = 305. + */ +#define PIXPAPER_VTOTAL (250 + 55) +#define PIXPAPER_VFP 2 +#define PIXPAPER_VSYNC 2 +#define PIXPAPER_VBP (55 - PIXPAPER_VFP - PIXPAPER_VSYNC) + +/* + * Pixel clock calculation: + * pixel_clock = htotal * vtotal * refresh_rate + * = 128 * 305 * 50 + * = 1,952,000 Hz = 1952 kHz + */ +#define PIXPAPER_PIXEL_CLOCK 1952 + +#define PIXPAPER_WIDTH_MM 24 /* approximate from 23.7046mm */ +#define PIXPAPER_HEIGHT_MM 49 /* approximate from 48.55mm */ + +#define PIXPAPER_SPI_BITS_PER_WORD 8 +#define PIXPAPER_SPI_SPEED_DEFAULT 1000000 + +#define PIXPAPER_PANEL_BUFFER_WIDTH 128 +#define PIXPAPER_PANEL_BUFFER_TWO_BYTES_PER_ROW (PIXPAPER_PANEL_BUFFER_WIDTH / 4) + +#define PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL 60 +#define PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL 200 +#define PIXPAPER_COLOR_THRESHOLD_YELLOW_MIN_GREEN 180 + +struct pixpaper_error_ctx { + int errno_code; +}; + +struct pixpaper_panel { + struct drm_device drm; + struct drm_plane plane; + struct drm_crtc crtc; + struct drm_encoder encoder; + struct drm_connector connector; + + struct spi_device *spi; + struct gpio_desc *reset; + struct gpio_desc *busy; + struct gpio_desc *dc; +}; + +static inline struct pixpaper_panel *to_pixpaper_panel(struct drm_device *drm) +{ + return container_of(drm, struct pixpaper_panel, drm); +} + +static void pixpaper_wait_for_panel(struct pixpaper_panel *panel) +{ + unsigned int timeout_ms = 10000; + unsigned long timeout_jiffies = jiffies + msecs_to_jiffies(timeout_ms); + + usleep_range(1000, 1500); + while (gpiod_get_value_cansleep(panel->busy) != 1) { + if (time_after(jiffies, timeout_jiffies)) { + drm_warn(&panel->drm, "Busy wait timed out\n"); + return; + } + usleep_range(100, 200); + } +} + +static void pixpaper_spi_sync(struct spi_device *spi, struct spi_message *msg, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + int ret = spi_sync(spi, msg); + + if (ret < 0) + err->errno_code = ret; +} + +static void pixpaper_send_cmd(struct pixpaper_panel *panel, u8 cmd, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + struct spi_transfer xfer = { + .tx_buf = &cmd, + .len = 1, + }; + struct spi_message msg; + + spi_message_init(&msg); + spi_message_add_tail(&xfer, &msg); + + gpiod_set_value_cansleep(panel->dc, 0); + usleep_range(1, 5); + pixpaper_spi_sync(panel->spi, &msg, err); +} + +static void pixpaper_send_data(struct pixpaper_panel *panel, u8 data, + struct pixpaper_error_ctx *err) +{ + if (err->errno_code) + return; + + struct spi_transfer xfer = { + .tx_buf = &data, + .len = 1, + }; + struct spi_message msg; + + spi_message_init(&msg); + spi_message_add_tail(&xfer, &msg); + + gpiod_set_value_cansleep(panel->dc, 1); + usleep_range(1, 5); + pixpaper_spi_sync(panel->spi, &msg, err); +} + +static int pixpaper_panel_hw_init(struct pixpaper_panel *panel) +{ + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + gpiod_set_value_cansleep(panel->reset, 0); + msleep(50); + gpiod_set_value_cansleep(panel->reset, 1); + msleep(50); + + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_4D, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_4D_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_PANEL_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_PSR_CONFIG, &err); + pixpaper_send_data(panel, PIXPAPER_PSR_CONFIG2, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSP_8_2V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSPL_15V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSN_4V, &err); + pixpaper_send_data(panel, PIXPAPER_PWR_VSP_8_2V, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_OFF_SEQUENCE, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_PFS_CONFIG3, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_BOOSTER_SOFT_START, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG2, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG3, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG4, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG5, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG6, &err); + pixpaper_send_data(panel, PIXPAPER_BTST_CONFIG7, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_PLL_CONTROL, &err); + pixpaper_send_data(panel, PIXPAPER_PLL_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_TEMP_SENSOR_CALIB, &err); + pixpaper_send_data(panel, PIXPAPER_TSE_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_VCOM_INTERVAL, &err); + pixpaper_send_data(panel, PIXPAPER_CDI_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_60, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_60_CONFIG1, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_60_CONFIG2, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_RESOLUTION_SETTING, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_HRES_H, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_HRES_L, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_VRES_H, &err); + pixpaper_send_data(panel, PIXPAPER_TRES_VRES_L, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_GATE_SOURCE_START, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_S_START, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_RESERVED, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_G_START_H, &err); + pixpaper_send_data(panel, PIXPAPER_GSST_G_START_L, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E7, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E7_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_SAVING, &err); + pixpaper_send_data(panel, PIXPAPER_PWS_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E0, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E0_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_B4, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_B4_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_B5, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_B5_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_UNKNOWN_E9, &err); + pixpaper_send_data(panel, PIXPAPER_UNKNOWN_E9_CONFIG, &err); + if (err.errno_code) + goto init_fail; + pixpaper_wait_for_panel(panel); + + return 0; + +init_fail: + drm_err(&panel->drm, "Hardware initialization failed (err=%d)\n", + err.errno_code); + return err.errno_code; +} + +/* + * Convert framebuffer pixels to 2-bit e-paper format: + * 00 - White + * 01 - Black + * 10 - Yellow + * 11 - Red + */ +static u8 pack_pixels_to_byte(__le32 *src_pixels, int i, int j, + struct drm_framebuffer *fb) +{ + u8 packed_byte = 0; + int k; + + for (k = 0; k < 4; k++) { + int current_pixel_x = j * 4 + k; + u8 two_bit_val; + + if (current_pixel_x < PIXPAPER_WIDTH) { + u32 pixel_offset = + (i * (fb->pitches[0] / 4)) + current_pixel_x; + u32 pixel = le32_to_cpu(src_pixels[pixel_offset]); + u32 r = (pixel >> 16) & 0xFF; + u32 g = (pixel >> 8) & 0xFF; + u32 b = pixel & 0xFF; + + if (r < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + g < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b00; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + b > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL) { + two_bit_val = 0b01; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b11; + } else if (r > PIXPAPER_COLOR_THRESHOLD_HIGH_CHANNEL && + g > PIXPAPER_COLOR_THRESHOLD_YELLOW_MIN_GREEN && + b < PIXPAPER_COLOR_THRESHOLD_LOW_CHANNEL) { + two_bit_val = 0b10; + } else { + two_bit_val = 0b01; + } + } else { + two_bit_val = 0b01; + } + + packed_byte |= two_bit_val << ((3 - k) * 2); + } + + return packed_byte; +} + +static int pixpaper_plane_helper_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = + drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc *new_crtc = new_plane_state->crtc; + struct drm_crtc_state *new_crtc_state = NULL; + int ret; + + if (new_crtc) + new_crtc_state = drm_atomic_get_new_crtc_state(state, new_crtc); + + ret = drm_atomic_helper_check_plane_state(new_plane_state, + new_crtc_state, DRM_PLANE_NO_SCALING, + DRM_PLANE_NO_SCALING, false, false); + if (ret) + return ret; + else if (!new_plane_state->visible) + return 0; + + return 0; +} + +static int pixpaper_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, crtc); + + if (!crtc_state->enable) + return 0; + + return drm_atomic_helper_check_crtc_primary_plane(crtc_state); +} + +static void pixpaper_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + struct drm_device *drm = &panel->drm; + int idx; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + if (!drm_dev_enter(drm, &idx)) + return; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_ON, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send PON command: %d\n", err.errno_code); + goto exit_drm_dev; + } + + pixpaper_wait_for_panel(panel); + + drm_dbg(drm, "Panel enabled and powered on\n"); + +exit_drm_dev: + drm_dev_exit(idx); +} + +static void pixpaper_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + struct drm_device *drm = &panel->drm; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_OFF, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send POF command: %d\n", err.errno_code); + goto exit_drm_dev; + } + pixpaper_wait_for_panel(panel); + + drm_dbg(drm, "Panel disabled\n"); + +exit_drm_dev: + drm_dev_exit(idx); +} + +static void pixpaper_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_new_plane_state(state, plane); + struct drm_shadow_plane_state *shadow_plane_state = + to_drm_shadow_plane_state(plane_state); + struct drm_crtc *crtc = plane_state->crtc; + struct pixpaper_panel *panel = to_pixpaper_panel(crtc->dev); + + struct drm_device *drm = &panel->drm; + struct drm_framebuffer *fb = plane_state->fb; + struct iosys_map map = shadow_plane_state->data[0]; + void *vaddr = map.vaddr; + int i, j, idx; + __le32 *src_pixels = NULL; + struct pixpaper_error_ctx err = { .errno_code = 0 }; + + if (!drm_dev_enter(drm, &idx)) + return; + + drm_dbg(drm, "Starting frame update (phys=%dx%d, buf_w=%d)\n", + PIXPAPER_WIDTH, PIXPAPER_HEIGHT, PIXPAPER_PANEL_BUFFER_WIDTH); + + if (!fb || !plane_state->visible) { + drm_err_once(drm, "No framebuffer or plane not visible, skipping update\n"); + goto update_cleanup; + } + + src_pixels = (__le32 *)vaddr; + + pixpaper_send_cmd(panel, PIXPAPER_CMD_DATA_START_TRANSMISSION, &err); + if (err.errno_code) + goto update_cleanup; + + pixpaper_wait_for_panel(panel); + + for (i = 0; i < PIXPAPER_HEIGHT; i++) { + for (j = 0; j < PIXPAPER_PANEL_BUFFER_TWO_BYTES_PER_ROW; j++) { + u8 packed_byte = + pack_pixels_to_byte(src_pixels, i, j, fb); + + pixpaper_wait_for_panel(panel); + pixpaper_send_data(panel, packed_byte, &err); + } + } + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_POWER_ON, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed to send PON command: %d\n", err.errno_code); + goto update_cleanup; + } + pixpaper_wait_for_panel(panel); + + pixpaper_send_cmd(panel, PIXPAPER_CMD_DISPLAY_REFRESH, &err); + pixpaper_send_data(panel, PIXPAPER_DRF_VCOM_AC, &err); + if (err.errno_code) { + drm_err_once(drm, "Failed sending data after DRF: %d\n", err.errno_code); + goto update_cleanup; + } + pixpaper_wait_for_panel(panel); + +update_cleanup: + if (err.errno_code && err.errno_code != -ETIMEDOUT) + drm_err_once(drm, "Frame update function failed with error %d\n", err.errno_code); + + drm_dev_exit(idx); +} + +static const struct drm_display_mode pixpaper_mode = { + .clock = PIXPAPER_PIXEL_CLOCK, + .hdisplay = PIXPAPER_WIDTH, + .hsync_start = PIXPAPER_WIDTH + PIXPAPER_HFP, + .hsync_end = PIXPAPER_WIDTH + PIXPAPER_HFP + PIXPAPER_HSYNC, + .htotal = PIXPAPER_HTOTAL, + .vdisplay = PIXPAPER_HEIGHT, + .vsync_start = PIXPAPER_HEIGHT + PIXPAPER_VFP, + .vsync_end = PIXPAPER_HEIGHT + PIXPAPER_VFP + PIXPAPER_VSYNC, + .vtotal = PIXPAPER_VTOTAL, + .width_mm = PIXPAPER_WIDTH_MM, + .height_mm = PIXPAPER_HEIGHT_MM, + .type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, +}; + +static int pixpaper_connector_get_modes(struct drm_connector *connector) +{ + return drm_connector_helper_get_modes_fixed(connector, &pixpaper_mode); +} + +static const struct drm_plane_funcs pixpaper_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + DRM_GEM_SHADOW_PLANE_FUNCS, +}; + +static const struct drm_plane_helper_funcs pixpaper_plane_helper_funcs = { + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, + .atomic_check = pixpaper_plane_helper_atomic_check, + .atomic_update = pixpaper_plane_atomic_update, +}; + +static const struct drm_crtc_funcs pixpaper_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .destroy = drm_crtc_cleanup, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, +}; + +static enum drm_mode_status +pixpaper_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) +{ + if (mode->hdisplay == PIXPAPER_WIDTH && + mode->vdisplay == PIXPAPER_HEIGHT) { + return MODE_OK; + } + return MODE_BAD; +} + +static const struct drm_crtc_helper_funcs pixpaper_crtc_helper_funcs = { + .mode_valid = pixpaper_mode_valid, + .atomic_check = pixpaper_crtc_helper_atomic_check, + .atomic_enable = pixpaper_crtc_atomic_enable, + .atomic_disable = pixpaper_crtc_atomic_disable, +}; + +static const struct drm_encoder_funcs pixpaper_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static const struct drm_connector_funcs pixpaper_connector_funcs = { + .reset = drm_atomic_helper_connector_reset, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_connector_helper_funcs pixpaper_connector_helper_funcs = { + .get_modes = pixpaper_connector_get_modes, +}; + +DEFINE_DRM_GEM_FOPS(pixpaper_fops); + +static struct drm_driver pixpaper_drm_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .fops = &pixpaper_fops, + .name = "pixpaper", + .desc = "DRM driver for PIXPAPER e-ink", + .major = 1, + .minor = 0, + DRM_GEM_SHMEM_DRIVER_OPS, + DRM_FBDEV_SHMEM_DRIVER_OPS, +}; + +static const struct drm_mode_config_funcs pixpaper_mode_config_funcs = { + .fb_create = drm_gem_fb_create_with_dirty, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static int pixpaper_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct pixpaper_panel *panel; + struct drm_device *drm; + int ret; + + panel = devm_drm_dev_alloc(dev, &pixpaper_drm_driver, + struct pixpaper_panel, drm); + if (IS_ERR(panel)) + return PTR_ERR(panel); + + drm = &panel->drm; + panel->spi = spi; + spi_set_drvdata(spi, panel); + + spi->mode = SPI_MODE_0; + spi->bits_per_word = PIXPAPER_SPI_BITS_PER_WORD; + + if (!spi->max_speed_hz) { + drm_warn(drm, + "spi-max-frequency not specified in DT, using default %u Hz\n", + PIXPAPER_SPI_SPEED_DEFAULT); + spi->max_speed_hz = PIXPAPER_SPI_SPEED_DEFAULT; + } + + ret = spi_setup(spi); + if (ret < 0) { + drm_err(drm, "SPI setup failed: %d\n", ret); + return ret; + } + + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + drm_err(drm, "Failed to set DMA mask: %d\n", ret); + return ret; + } + + panel->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(panel->reset)) + return PTR_ERR(panel->reset); + + panel->busy = devm_gpiod_get(dev, "busy", GPIOD_IN); + if (IS_ERR(panel->busy)) + return PTR_ERR(panel->busy); + + panel->dc = devm_gpiod_get(dev, "dc", GPIOD_OUT_HIGH); + if (IS_ERR(panel->dc)) + return PTR_ERR(panel->dc); + + ret = pixpaper_panel_hw_init(panel); + if (ret) { + drm_err(drm, "Panel hardware initialization failed: %d\n", ret); + return ret; + } + + ret = drmm_mode_config_init(drm); + if (ret) + return ret; + drm->mode_config.funcs = &pixpaper_mode_config_funcs; + drm->mode_config.min_width = PIXPAPER_WIDTH; + drm->mode_config.max_width = PIXPAPER_WIDTH; + drm->mode_config.min_height = PIXPAPER_HEIGHT; + drm->mode_config.max_height = PIXPAPER_HEIGHT; + + ret = drm_universal_plane_init(drm, &panel->plane, 1, + &pixpaper_plane_funcs, + (const uint32_t[]){ DRM_FORMAT_XRGB8888 }, + 1, NULL, DRM_PLANE_TYPE_PRIMARY, NULL); + if (ret) + return ret; + drm_plane_helper_add(&panel->plane, &pixpaper_plane_helper_funcs); + + ret = drm_crtc_init_with_planes(drm, &panel->crtc, &panel->plane, NULL, + &pixpaper_crtc_funcs, NULL); + if (ret) + return ret; + drm_crtc_helper_add(&panel->crtc, &pixpaper_crtc_helper_funcs); + + ret = drm_encoder_init(drm, &panel->encoder, &pixpaper_encoder_funcs, + DRM_MODE_ENCODER_NONE, NULL); + if (ret) + return ret; + panel->encoder.possible_crtcs = drm_crtc_mask(&panel->crtc); + + ret = drm_connector_init(drm, &panel->connector, + &pixpaper_connector_funcs, + DRM_MODE_CONNECTOR_SPI); + if (ret) + return ret; + + drm_connector_helper_add(&panel->connector, + &pixpaper_connector_helper_funcs); + drm_connector_attach_encoder(&panel->connector, &panel->encoder); + + drm_mode_config_reset(drm); + + ret = drm_dev_register(drm, 0); + if (ret) + return ret; + + drm_client_setup(drm, NULL); + + return 0; +} + +static void pixpaper_remove(struct spi_device *spi) +{ + struct pixpaper_panel *panel = spi_get_drvdata(spi); + + if (!panel) + return; + + drm_dev_unplug(&panel->drm); + drm_atomic_helper_shutdown(&panel->drm); +} + +static const struct spi_device_id pixpaper_ids[] = { { "pixpaper", 0 }, {} }; +MODULE_DEVICE_TABLE(spi, pixpaper_ids); + +static const struct of_device_id pixpaper_dt_ids[] = { + { .compatible = "mayqueen,pixpaper" }, + {} +}; +MODULE_DEVICE_TABLE(of, pixpaper_dt_ids); + +static struct spi_driver pixpaper_spi_driver = { + .driver = { + .name = "pixpaper", + .of_match_table = pixpaper_dt_ids, + }, + .id_table = pixpaper_ids, + .probe = pixpaper_probe, + .remove = pixpaper_remove, +}; + +module_spi_driver(pixpaper_spi_driver); + +MODULE_AUTHOR("LiangCheng Wang"); +MODULE_DESCRIPTION("DRM SPI driver for PIXPAPER e-ink panel"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 5c3b51eb0a97..4824f863fdba 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -510,13 +510,12 @@ static void repaper_get_temperature(struct repaper_epd *epd) epd->factored_stage_time = epd->stage_time * factor10x / 10; } -static int repaper_fb_dirty(struct drm_framebuffer *fb, +static int repaper_fb_dirty(struct drm_framebuffer *fb, const struct iosys_map *vmap, struct drm_format_conv_state *fmtcnv_state) { - struct drm_gem_dma_object *dma_obj = drm_fb_dma_get_gem_obj(fb, 0); struct repaper_epd *epd = drm_to_epd(fb->dev); unsigned int dst_pitch = 0; - struct iosys_map dst, vmap; + struct iosys_map dst; struct drm_rect clip; int idx, ret = 0; u8 *buf = NULL; @@ -546,8 +545,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, goto out_free; iosys_map_set_vaddr(&dst, buf); - iosys_map_set_vaddr(&vmap, dma_obj->vaddr); - drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, &vmap, fb, &clip, fmtcnv_state); + drm_fb_xrgb8888_to_mono(&dst, &dst_pitch, vmap, fb, &clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); @@ -832,16 +830,15 @@ static void repaper_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_plane_state *old_state) { struct drm_plane_state *state = pipe->plane.state; - struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; + struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state); struct drm_rect rect; if (!pipe->crtc.state->active) return; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - repaper_fb_dirty(state->fb, &fmtcnv_state); - - drm_format_conv_state_release(&fmtcnv_state); + repaper_fb_dirty(state->fb, shadow_plane_state->data, + &shadow_plane_state->fmtcnv_state); } static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { @@ -849,6 +846,7 @@ static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { .enable = repaper_pipe_enable, .disable = repaper_pipe_disable, .update = repaper_pipe_update, + DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, }; static int repaper_connector_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c index 03d2850310c4..64272cd0f6e2 100644 --- a/drivers/gpu/drm/tiny/sharp-memory.c +++ b/drivers/gpu/drm/tiny/sharp-memory.c @@ -126,28 +126,28 @@ static inline void sharp_memory_set_tx_buffer_addresses(u8 *buffer, static void sharp_memory_set_tx_buffer_data(u8 *buffer, struct drm_framebuffer *fb, + const struct iosys_map *vmap, struct drm_rect clip, u32 pitch, struct drm_format_conv_state *fmtcnv_state) { int ret; - struct iosys_map dst, vmap; - struct drm_gem_dma_object *dma_obj = drm_fb_dma_get_gem_obj(fb, 0); + struct iosys_map dst; ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); if (ret) return; iosys_map_set_vaddr(&dst, buffer); - iosys_map_set_vaddr(&vmap, dma_obj->vaddr); - drm_fb_xrgb8888_to_mono(&dst, &pitch, &vmap, fb, &clip, fmtcnv_state); + drm_fb_xrgb8888_to_mono(&dst, &pitch, vmap, fb, &clip, fmtcnv_state); drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); } static int sharp_memory_update_display(struct sharp_memory_device *smd, struct drm_framebuffer *fb, + const struct iosys_map *vmap, struct drm_rect clip, struct drm_format_conv_state *fmtcnv_state) { @@ -163,7 +163,7 @@ static int sharp_memory_update_display(struct sharp_memory_device *smd, sharp_memory_set_tx_buffer_mode(&tx_buffer[0], SHARP_MEMORY_DISPLAY_UPDATE_MODE, vcom); sharp_memory_set_tx_buffer_addresses(&tx_buffer[1], clip, pitch); - sharp_memory_set_tx_buffer_data(&tx_buffer[2], fb, clip, pitch, fmtcnv_state); + sharp_memory_set_tx_buffer_data(&tx_buffer[2], fb, vmap, clip, pitch, fmtcnv_state); ret = sharp_memory_spi_write(smd->spi, tx_buffer, tx_buffer_size); @@ -206,7 +206,8 @@ static int sharp_memory_clear_display(struct sharp_memory_device *smd) return ret; } -static void sharp_memory_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect, +static void sharp_memory_fb_dirty(struct drm_framebuffer *fb, const struct iosys_map *vmap, + struct drm_rect *rect, struct drm_format_conv_state *fmtconv_state) { struct drm_rect clip; @@ -218,7 +219,7 @@ static void sharp_memory_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *r clip.y1 = rect->y1; clip.y2 = rect->y2; - sharp_memory_update_display(smd, fb, clip, fmtconv_state); + sharp_memory_update_display(smd, fb, vmap, clip, fmtconv_state); } static int sharp_memory_plane_atomic_check(struct drm_plane *plane, @@ -242,7 +243,7 @@ static void sharp_memory_plane_atomic_update(struct drm_plane *plane, { struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane); struct drm_plane_state *plane_state = plane->state; - struct drm_format_conv_state fmtcnv_state = DRM_FORMAT_CONV_STATE_INIT; + struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct sharp_memory_device *smd; struct drm_rect rect; @@ -251,15 +252,15 @@ static void sharp_memory_plane_atomic_update(struct drm_plane *plane, return; if (drm_atomic_helper_damage_merged(old_state, plane_state, &rect)) - sharp_memory_fb_dirty(plane_state->fb, &rect, &fmtcnv_state); - - drm_format_conv_state_release(&fmtcnv_state); + sharp_memory_fb_dirty(plane_state->fb, shadow_plane_state->data, + &rect, &shadow_plane_state->fmtcnv_state); } static const struct drm_plane_helper_funcs sharp_memory_plane_helper_funcs = { .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = sharp_memory_plane_atomic_check, .atomic_update = sharp_memory_plane_atomic_update, + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, }; static bool sharp_memory_format_mod_supported(struct drm_plane *plane, @@ -273,9 +274,7 @@ static const struct drm_plane_funcs sharp_memory_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = drm_plane_cleanup, - .reset = drm_atomic_helper_plane_reset, - .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, + DRM_GEM_SHADOW_PLANE_FUNCS, .format_mod_supported = sharp_memory_format_mod_supported, }; diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 3148f5d3dbd6..1bcc67977f48 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -542,14 +542,15 @@ static void ttm_bo_validate_no_placement_signaled(struct kunit *test) bo->ttm = old_tt; } - err = ttm_resource_alloc(bo, place, &bo->resource, NULL); - KUNIT_EXPECT_EQ(test, err, 0); - KUNIT_ASSERT_EQ(test, man->usage, size); - placement = kunit_kzalloc(test, sizeof(*placement), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, placement); ttm_bo_reserve(bo, false, false, NULL); + + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); + KUNIT_EXPECT_EQ(test, err, 0); + KUNIT_ASSERT_EQ(test, man->usage, size); + err = ttm_bo_validate(bo, placement, &ctx); ttm_bo_unreserve(bo); @@ -757,56 +758,6 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) ttm_mock_manager_fini(priv->ttm_dev, snd_mem); } -static void ttm_bo_validate_swapout(struct kunit *test) -{ - unsigned long size_big, size = ALIGN(BO_SIZE, PAGE_SIZE); - enum ttm_bo_type bo_type = ttm_bo_type_device; - struct ttm_buffer_object *bo_small, *bo_big; - struct ttm_test_devices *priv = test->priv; - struct ttm_operation_ctx ctx = { }; - struct ttm_placement *placement; - u32 mem_type = TTM_PL_TT; - struct ttm_place *place; - struct sysinfo si; - int err; - - si_meminfo(&si); - size_big = ALIGN(((u64)si.totalram * si.mem_unit / 2), PAGE_SIZE); - - ttm_mock_manager_init(priv->ttm_dev, mem_type, size_big + size); - - place = ttm_place_kunit_init(test, mem_type, 0); - placement = ttm_placement_kunit_init(test, place, 1); - - bo_small = kunit_kzalloc(test, sizeof(*bo_small), GFP_KERNEL); - KUNIT_ASSERT_NOT_NULL(test, bo_small); - - drm_gem_private_object_init(priv->drm, &bo_small->base, size); - - err = ttm_bo_init_reserved(priv->ttm_dev, bo_small, bo_type, placement, - PAGE_SIZE, &ctx, NULL, NULL, - &dummy_ttm_bo_destroy); - KUNIT_EXPECT_EQ(test, err, 0); - dma_resv_unlock(bo_small->base.resv); - - bo_big = ttm_bo_kunit_init(test, priv, size_big, NULL); - - dma_resv_lock(bo_big->base.resv, NULL); - err = ttm_bo_validate(bo_big, placement, &ctx); - dma_resv_unlock(bo_big->base.resv); - - KUNIT_EXPECT_EQ(test, err, 0); - KUNIT_EXPECT_NOT_NULL(test, bo_big->resource); - KUNIT_EXPECT_EQ(test, bo_big->resource->mem_type, mem_type); - KUNIT_EXPECT_EQ(test, bo_small->resource->mem_type, TTM_PL_SYSTEM); - KUNIT_EXPECT_TRUE(test, bo_small->ttm->page_flags & TTM_TT_FLAG_SWAPPED); - - ttm_bo_put(bo_big); - ttm_bo_put(bo_small); - - ttm_mock_manager_fini(priv->ttm_dev, mem_type); -} - static void ttm_bo_validate_happy_evict(struct kunit *test) { u32 mem_type = TTM_PL_VRAM, mem_multihop = TTM_PL_TT, @@ -1201,7 +1152,6 @@ static struct kunit_case ttm_bo_validate_test_cases[] = { KUNIT_CASE(ttm_bo_validate_move_fence_signaled), KUNIT_CASE_PARAM(ttm_bo_validate_move_fence_not_signaled, ttm_bo_validate_wait_gen_params), - KUNIT_CASE(ttm_bo_validate_swapout), KUNIT_CASE(ttm_bo_validate_happy_evict), KUNIT_CASE(ttm_bo_validate_all_pinned_evict), KUNIT_CASE(ttm_bo_validate_allowed_only_evict), diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index cb1b8e5dadf5..32530c75f038 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -114,15 +114,8 @@ ttm_backup_backup_page(struct file *backup, struct page *page, if (writeback && !folio_mapped(to_folio) && folio_clear_dirty_for_io(to_folio)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = SWAP_CLUSTER_MAX, - .range_start = 0, - .range_end = LLONG_MAX, - .for_reclaim = 1, - }; folio_set_reclaim(to_folio); - ret = shmem_writeout(to_folio, &wbc); + ret = shmem_writeout(to_folio, NULL, NULL); if (!folio_test_writeback(to_folio)) folio_clear_reclaim(to_folio); /* diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index bb9c5c8e16b5..29423ceeec5c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -526,11 +526,11 @@ static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object * return 0; if (bo->deleted) { - lret = ttm_bo_wait_ctx(bo, walk->ctx); + lret = ttm_bo_wait_ctx(bo, walk->arg.ctx); if (!lret) ttm_bo_cleanup_memtype_use(bo); } else { - lret = ttm_bo_evict(bo, walk->ctx); + lret = ttm_bo_evict(bo, walk->arg.ctx); } if (lret) @@ -566,8 +566,10 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, struct ttm_bo_evict_walk evict_walk = { .walk = { .ops = &ttm_evict_walk_ops, - .ctx = ctx, - .ticket = ticket, + .arg = { + .ctx = ctx, + .ticket = ticket, + } }, .place = place, .evictor = evictor, @@ -576,7 +578,7 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, }; s64 lret; - evict_walk.walk.trylock_only = true; + evict_walk.walk.arg.trylock_only = true; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); /* One more attempt if we hit low limit? */ @@ -590,12 +592,12 @@ static int ttm_bo_evict_alloc(struct ttm_device *bdev, /* Reset low limit */ evict_walk.try_low = evict_walk.hit_low = false; /* If ticket-locking, repeat while making progress. */ - evict_walk.walk.trylock_only = false; + evict_walk.walk.arg.trylock_only = false; retry: do { /* The walk may clear the evict_walk.walk.ticket field */ - evict_walk.walk.ticket = ticket; + evict_walk.walk.arg.ticket = ticket; evict_walk.evicted = 0; lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); } while (!lret && evict_walk.evicted); @@ -1106,7 +1108,7 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) struct ttm_place place = {.mem_type = bo->resource->mem_type}; struct ttm_bo_swapout_walk *swapout_walk = container_of(walk, typeof(*swapout_walk), walk); - struct ttm_operation_ctx *ctx = walk->ctx; + struct ttm_operation_ctx *ctx = walk->arg.ctx; s64 ret; /* @@ -1217,8 +1219,10 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, struct ttm_bo_swapout_walk swapout_walk = { .walk = { .ops = &ttm_swap_ops, - .ctx = ctx, - .trylock_only = true, + .arg = { + .ctx = ctx, + .trylock_only = true, + }, }, .gfp_flags = gfp_flags, }; @@ -1279,3 +1283,18 @@ int ttm_bo_populate(struct ttm_buffer_object *bo, return 0; } EXPORT_SYMBOL(ttm_bo_populate); + +int ttm_bo_setup_export(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx) +{ + int ret; + + ret = ttm_bo_reserve(bo, false, false, NULL); + if (ret != 0) + return ret; + + ret = ttm_bo_populate(bo, ctx); + ttm_bo_unreserve(bo); + return ret; +} +EXPORT_SYMBOL(ttm_bo_setup_export); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index b9a772b26fa1..acbbca9d5c92 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -258,6 +258,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -266,12 +273,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; @@ -382,6 +383,32 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, } /** + * ttm_bo_kmap_try_from_panic + * + * @bo: The buffer object + * @page: The page to map + * + * Sets up a kernel virtual mapping using kmap_local_page_try_from_panic(). + * This should only be called from the panic handler, if you make sure the bo + * is the one being displayed, so is properly allocated, and protected. + * + * Returns the vaddr, that you can use to write to the bo, and that you should + * pass to kunmap_local() when you're done with this page, or NULL if the bo + * is in iomem. + */ +void *ttm_bo_kmap_try_from_panic(struct ttm_buffer_object *bo, unsigned long page) +{ + if (page + 1 > PFN_UP(bo->resource->size)) + return NULL; + + if (!bo->resource->bus.is_iomem && bo->ttm->pages && bo->ttm->pages[page]) + return kmap_local_page_try_from_panic(bo->ttm->pages[page]); + + return NULL; +} +EXPORT_SYMBOL(ttm_bo_kmap_try_from_panic); + +/** * ttm_bo_kmap * * @bo: The buffer object. @@ -773,14 +800,15 @@ error_destroy_tt: return ret; } -static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx, - struct ttm_buffer_object *bo, - bool *needs_unlock) +static bool ttm_lru_walk_trylock(struct ttm_bo_lru_cursor *curs, + struct ttm_buffer_object *bo) { - *needs_unlock = false; + struct ttm_operation_ctx *ctx = curs->arg->ctx; + + curs->needs_unlock = false; if (dma_resv_trylock(bo->base.resv)) { - *needs_unlock = true; + curs->needs_unlock = true; return true; } @@ -792,27 +820,27 @@ static bool ttm_lru_walk_trylock(struct ttm_operation_ctx *ctx, return false; } -static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, - struct ttm_buffer_object *bo, - bool *needs_unlock) +static int ttm_lru_walk_ticketlock(struct ttm_bo_lru_cursor *curs, + struct ttm_buffer_object *bo) { + struct ttm_lru_walk_arg *arg = curs->arg; struct dma_resv *resv = bo->base.resv; int ret; - if (walk->ctx->interruptible) - ret = dma_resv_lock_interruptible(resv, walk->ticket); + if (arg->ctx->interruptible) + ret = dma_resv_lock_interruptible(resv, arg->ticket); else - ret = dma_resv_lock(resv, walk->ticket); + ret = dma_resv_lock(resv, arg->ticket); if (!ret) { - *needs_unlock = true; + curs->needs_unlock = true; /* * Only a single ticketlock per loop. Ticketlocks are prone * to return -EDEADLK causing the eviction to fail, so * after waiting for the ticketlock, revert back to * trylocking for this walk. */ - walk->ticket = NULL; + arg->ticket = NULL; } else if (ret == -EDEADLK) { /* Caller needs to exit the ww transaction. */ ret = -ENOSPC; @@ -821,12 +849,6 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, return ret; } -static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked) -{ - if (locked) - dma_resv_unlock(bo->base.resv); -} - /** * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on * valid items. @@ -861,64 +883,21 @@ static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked) s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, struct ttm_resource_manager *man, s64 target) { - struct ttm_resource_cursor cursor; - struct ttm_resource *res; + struct ttm_bo_lru_cursor cursor; + struct ttm_buffer_object *bo; s64 progress = 0; s64 lret; - spin_lock(&bdev->lru_lock); - ttm_resource_cursor_init(&cursor, man); - ttm_resource_manager_for_each_res(&cursor, res) { - struct ttm_buffer_object *bo = res->bo; - bool bo_needs_unlock = false; - bool bo_locked = false; - int mem_type; - - /* - * Attempt a trylock before taking a reference on the bo, - * since if we do it the other way around, and the trylock fails, - * we need to drop the lru lock to put the bo. - */ - if (ttm_lru_walk_trylock(walk->ctx, bo, &bo_needs_unlock)) - bo_locked = true; - else if (!walk->ticket || walk->ctx->no_wait_gpu || - walk->trylock_only) - continue; - - if (!ttm_bo_get_unless_zero(bo)) { - ttm_lru_walk_unlock(bo, bo_needs_unlock); - continue; - } - - mem_type = res->mem_type; - spin_unlock(&bdev->lru_lock); - - lret = 0; - if (!bo_locked) - lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock); - - /* - * Note that in between the release of the lru lock and the - * ticketlock, the bo may have switched resource, - * and also memory type, since the resource may have been - * freed and allocated again with a different memory type. - * In that case, just skip it. - */ - if (!lret && bo->resource && bo->resource->mem_type == mem_type) - lret = walk->ops->process_bo(walk, bo); - - ttm_lru_walk_unlock(bo, bo_needs_unlock); - ttm_bo_put(bo); + ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) { + lret = walk->ops->process_bo(walk, bo); if (lret == -EBUSY || lret == -EALREADY) lret = 0; progress = (lret < 0) ? lret : progress + lret; - - spin_lock(&bdev->lru_lock); if (progress < 0 || progress >= target) break; } - ttm_resource_cursor_fini(&cursor); - spin_unlock(&bdev->lru_lock); + if (IS_ERR(bo)) + return PTR_ERR(bo); return progress; } @@ -956,44 +935,87 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini); * ttm_bo_lru_cursor_init() - Initialize a struct ttm_bo_lru_cursor * @curs: The ttm_bo_lru_cursor to initialize. * @man: The ttm resource_manager whose LRU lists to iterate over. - * @ctx: The ttm_operation_ctx to govern the locking. + * @arg: The ttm_lru_walk_arg to govern the walk. * - * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking - * or prelocked buffer objects are available as detailed by - * @ctx::resv and @ctx::allow_res_evict. Ticketlocking is not - * supported. + * Initialize a struct ttm_bo_lru_cursor. * * Return: Pointer to @curs. The function does not fail. */ struct ttm_bo_lru_cursor * ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs, struct ttm_resource_manager *man, - struct ttm_operation_ctx *ctx) + struct ttm_lru_walk_arg *arg) { memset(curs, 0, sizeof(*curs)); ttm_resource_cursor_init(&curs->res_curs, man); - curs->ctx = ctx; + curs->arg = arg; return curs; } EXPORT_SYMBOL(ttm_bo_lru_cursor_init); static struct ttm_buffer_object * -ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *curs) +__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs) { - struct ttm_buffer_object *bo = res->bo; + spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock; + struct ttm_resource *res = NULL; + struct ttm_buffer_object *bo; + struct ttm_lru_walk_arg *arg = curs->arg; + bool first = !curs->bo; - if (!ttm_lru_walk_trylock(curs->ctx, bo, &curs->needs_unlock)) - return NULL; + ttm_bo_lru_cursor_cleanup_bo(curs); - if (!ttm_bo_get_unless_zero(bo)) { - if (curs->needs_unlock) - dma_resv_unlock(bo->base.resv); - return NULL; + spin_lock(lru_lock); + for (;;) { + int mem_type, ret = 0; + bool bo_locked = false; + + if (first) { + res = ttm_resource_manager_first(&curs->res_curs); + first = false; + } else { + res = ttm_resource_manager_next(&curs->res_curs); + } + if (!res) + break; + + bo = res->bo; + if (ttm_lru_walk_trylock(curs, bo)) + bo_locked = true; + else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only) + continue; + + if (!ttm_bo_get_unless_zero(bo)) { + if (curs->needs_unlock) + dma_resv_unlock(bo->base.resv); + continue; + } + + mem_type = res->mem_type; + spin_unlock(lru_lock); + if (!bo_locked) + ret = ttm_lru_walk_ticketlock(curs, bo); + + /* + * Note that in between the release of the lru lock and the + * ticketlock, the bo may have switched resource, + * and also memory type, since the resource may have been + * freed and allocated again with a different memory type. + * In that case, just skip it. + */ + curs->bo = bo; + if (!ret && bo->resource && bo->resource->mem_type == mem_type) + return bo; + + ttm_bo_lru_cursor_cleanup_bo(curs); + if (ret && ret != -EALREADY) + return ERR_PTR(ret); + + spin_lock(lru_lock); } - curs->bo = bo; - return bo; + spin_unlock(lru_lock); + return res ? bo : NULL; } /** @@ -1007,25 +1029,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur */ struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs) { - spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock; - struct ttm_resource *res = NULL; - struct ttm_buffer_object *bo; - - ttm_bo_lru_cursor_cleanup_bo(curs); - - spin_lock(lru_lock); - for (;;) { - res = ttm_resource_manager_next(&curs->res_curs); - if (!res) - break; - - bo = ttm_bo_from_res_reserved(res, curs); - if (bo) - break; - } - - spin_unlock(lru_lock); - return res ? bo : NULL; + return __ttm_bo_lru_cursor_next(curs); } EXPORT_SYMBOL(ttm_bo_lru_cursor_next); @@ -1039,21 +1043,8 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next); */ struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs) { - spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock; - struct ttm_buffer_object *bo; - struct ttm_resource *res; - - spin_lock(lru_lock); - res = ttm_resource_manager_first(&curs->res_curs); - if (!res) { - spin_unlock(lru_lock); - return NULL; - } - - bo = ttm_bo_from_res_reserved(res, curs); - spin_unlock(lru_lock); - - return bo ? bo : ttm_bo_lru_cursor_next(curs); + ttm_bo_lru_cursor_cleanup_bo(curs); + return __ttm_bo_lru_cursor_next(curs); } EXPORT_SYMBOL(ttm_bo_lru_cursor_first); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 816e2cba6016..c3e2fcbdd2cc 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -125,6 +125,28 @@ out: return ret; } +/** + * ttm_device_prepare_hibernation - move GTT BOs to shmem for hibernation. + * + * @bdev: A pointer to a struct ttm_device to prepare hibernation for. + * + * Return: 0 on success, negative number on failure. + */ +int ttm_device_prepare_hibernation(struct ttm_device *bdev) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + }; + int ret; + + do { + ret = ttm_device_swapout(bdev, &ctx, GFP_KERNEL); + } while (ret > 0); + return ret; +} +EXPORT_SYMBOL(ttm_device_prepare_hibernation); + /* * A buffer object shrink method that tries to swap out the first * buffer object on the global::swap_lru list. diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 000000000000..4b55308fd2eb --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + default n + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. This driver is work + in progress and may not be functional. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 000000000000..ba545f65f2c0 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 000000000000..d5625dd1e41c --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::c_str; +use kernel::clk::Clk; +use kernel::clk::OptionalClk; +use kernel::device::Bound; +use kernel::device::Core; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::ioctl; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator; +use kernel::regulator::Regulator; +use kernel::sizes::SZ_2M; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::gem::TyrObject; +use crate::gpu; +use crate::gpu::GpuInfo; +use crate::regs; + +pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>; + +/// Convenience type alias for the DRM device type for this driver. +pub(crate) type TyrDevice = drm::Device<TyrDriver>; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef<TyrDevice>, +} + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrData { + pub(crate) pdev: ARef<platform::Device>, + + #[pin] + clks: Mutex<Clocks>, + + #[pin] + regulators: Mutex<Regulators>, + + /// Some information on the GPU. + /// + /// This is mainly queried by userspace, i.e.: Mesa. + pub(crate) gpu_info: GpuInfo, +} + +// Both `Clk` and `Regulator` do not implement `Send` or `Sync`, but they +// should. There are patches on the mailing list to address this, but they have +// not landed yet. +// +// For now, add this workaround so that this patch compiles with the promise +// that it will be removed in a future patch. +// +// SAFETY: This will be removed in a future patch. +unsafe impl Send for TyrData {} +// SAFETY: This will be removed in a future patch. +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED == 0 { + dev_err!(dev, "GPU reset failed with errno\n"); + dev_err!( + dev, + "GPU_INT_RAWSTAT is {}\n", + regs::GPU_IRQ_RAWSTAT.read(dev, iomem)? + ); + + return Err(EIO); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <TyrDriver as platform::Driver>::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + _info: Option<&Self::IdInfo>, + ) -> Result<Pin<KBox<Self>>> { + let core_clk = Clk::get(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = OptionalClk::get(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::<regulator::Enabled>::get(pdev.as_ref(), c_str!("sram"))?; + + let request = pdev.io_request_by_index(0).ok_or(ENODEV)?; + let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?; + + issue_soft_reset(pdev.as_ref(), &iomem)?; + gpu::l2_power_on(pdev.as_ref(), &iomem)?; + + let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?; + gpu_info.log(pdev); + + let platform: ARef<platform::Device> = pdev.into(); + + let data = try_pin_init!(TyrData { + pdev: platform.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + }); + + let tdev: ARef<TyrDevice> = drm::Device::new(pdev.as_ref(), data)?; + drm::driver::Registration::new_foreign_owned(&tdev, pdev.as_ref(), 0)?; + + let driver = KBox::pin_init(try_pin_init!(TyrDriver { device: tdev }), GFP_KERNEL)?; + + // We need this to be dev_info!() because dev_dbg!() does not work at + // all in Rust for now, and we need to see whether probe succeeded. + dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) {} +} + +#[pinned_drop] +impl PinnedDrop for TyrData { + fn drop(self: Pin<&mut Self>) { + // TODO: the type-state pattern for Clks will fix this. + let clks = self.clks.lock(); + clks.core.disable_unprepare(); + clks.stacks.disable_unprepare(); + clks.coregroup.disable_unprepare(); + } +} + +// We need to retain the name "panthor" to achieve drop-in compatibility with +// the C driver in the userspace stack. +const INFO: drm::DriverInfo = drm::DriverInfo { + major: 1, + minor: 5, + patchlevel: 0, + name: c_str!("panthor"), + desc: c_str!("ARM Mali Tyr DRM driver"), +}; + +#[vtable] +impl drm::Driver for TyrDriver { + type Data = TyrData; + type File = File; + type Object = drm::gem::Object<TyrObject>; + + const INFO: drm::DriverInfo = INFO; + + kernel::declare_drm_ioctls! { + (PANTHOR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: OptionalClk, + coregroup: OptionalClk, +} + +#[pin_data] +struct Regulators { + mali: Regulator<regulator::Enabled>, + sram: Regulator<regulator::Enabled>, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 000000000000..0ef432947b73 --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::drm; +use kernel::prelude::*; +use kernel::uaccess::UserSlice; +use kernel::uapi; + +use crate::driver::TyrDevice; +use crate::TyrDriver; + +#[pin_data] +pub(crate) struct File {} + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File<File>; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(_dev: &drm::Device<Self::Driver>) -> Result<Pin<KBox<Self>>> { + KBox::try_pin_init(try_pin_init!(Self {}), GFP_KERNEL) + } +} + +impl File { + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result<u32> { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = UserSlice::new( + UserPtr::from_addr(devquery.pointer as usize), + devquery.size as usize, + ) + .writer(); + + writer.write(&tdev.gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 000000000000..1273bf89dbd5 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use kernel::drm::gem; +use kernel::prelude::*; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct TyrObject {} + +impl gem::DriverObject for TyrObject { + type Driver = TyrDriver; + + fn new(_dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { + try_pin_init!(TyrObject {}) + } +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 000000000000..6c582910dd5d --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits::genmask_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::platform; +use kernel::prelude::*; +use kernel::time; +use kernel::transmute::AsBytes; + +use crate::driver::IoMem; +use crate::regs; + +/// Struct containing information that can be queried by userspace. This is read from +/// the GPU's registers. +/// +/// # Invariants +/// +/// - The layout of this struct identical to the C `struct drm_panthor_gpu_info`. +#[repr(C)] +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) csf_id: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) pad0: u32, + pub(crate) shader_present: u64, + pub(crate) l2_present: u64, + pub(crate) tiler_present: u64, + pub(crate) core_features: u32, + pub(crate) pad: u32, +} + +impl GpuInfo { + pub(crate) fn new(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<Self> { + let gpu_id = regs::GPU_ID.read(dev, iomem)?; + let csf_id = regs::GPU_CSF_ID.read(dev, iomem)?; + let gpu_rev = regs::GPU_REVID.read(dev, iomem)?; + let core_features = regs::GPU_CORE_FEATURES.read(dev, iomem)?; + let l2_features = regs::GPU_L2_FEATURES.read(dev, iomem)?; + let tiler_features = regs::GPU_TILER_FEATURES.read(dev, iomem)?; + let mem_features = regs::GPU_MEM_FEATURES.read(dev, iomem)?; + let mmu_features = regs::GPU_MMU_FEATURES.read(dev, iomem)?; + let thread_features = regs::GPU_THREAD_FEATURES.read(dev, iomem)?; + let max_threads = regs::GPU_THREAD_MAX_THREADS.read(dev, iomem)?; + let thread_max_workgroup_size = regs::GPU_THREAD_MAX_WORKGROUP_SIZE.read(dev, iomem)?; + let thread_max_barrier_size = regs::GPU_THREAD_MAX_BARRIER_SIZE.read(dev, iomem)?; + let coherency_features = regs::GPU_COHERENCY_FEATURES.read(dev, iomem)?; + + let texture_features = regs::GPU_TEXTURE_FEATURES0.read(dev, iomem)?; + + let as_present = regs::GPU_AS_PRESENT.read(dev, iomem)?; + + let shader_present = u64::from(regs::GPU_SHADER_PRESENT_LO.read(dev, iomem)?); + let shader_present = + shader_present | u64::from(regs::GPU_SHADER_PRESENT_HI.read(dev, iomem)?) << 32; + + let tiler_present = u64::from(regs::GPU_TILER_PRESENT_LO.read(dev, iomem)?); + let tiler_present = + tiler_present | u64::from(regs::GPU_TILER_PRESENT_HI.read(dev, iomem)?) << 32; + + let l2_present = u64::from(regs::GPU_L2_PRESENT_LO.read(dev, iomem)?); + let l2_present = l2_present | u64::from(regs::GPU_L2_PRESENT_HI.read(dev, iomem)?) << 32; + + Ok(Self { + gpu_id, + gpu_rev, + csf_id, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + // TODO: Add texture_features_{1,2,3}. + texture_features: [texture_features, 0, 0, 0], + as_present, + pad0: 0, + shader_present, + l2_present, + tiler_present, + core_features, + pad: 0, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + /// Returns the number of virtual address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & genmask_u32(0..=7) + } + + /// Returns the number of physical address bits supported by the GPU. + #[expect(dead_code)] + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & genmask_u32(0..=7) + } +} + +// SAFETY: `GpuInfo`'s invariant guarantees that it is the same type that is +// already exposed to userspace by the C driver. This implies that it fulfills +// the requirements for `AsBytes`. +// +// This means: +// +// - No implicit padding, +// - No kernel pointers, +// - No interior mutability. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +#[allow(dead_code)] +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From<u32> for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(28..=31)) >> 28, + arch_minor: (value & genmask_u32(24..=27)) >> 24, + arch_rev: (value & genmask_u32(20..=23)) >> 20, + prod_major: (value & genmask_u32(16..=19)) >> 16, + ver_major: (value & genmask_u32(12..=15)) >> 12, + ver_minor: (value & genmask_u32(4..=11)) >> 4, + ver_status: value & genmask_u32(0..=3), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result { + regs::L2_PWRON_LO.write(dev, iomem, 1)?; + + // TODO: We cannot poll, as there is no support in Rust currently, so we + // sleep. Change this when read_poll_timeout() is implemented in Rust. + kernel::time::delay::fsleep(time::Delta::from_millis(100)); + + if regs::L2_READY_LO.read(dev, iomem)? != 1 { + dev_err!(dev, "Failed to power on the GPU\n"); + return Err(EIO); + } + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 000000000000..f46933aaa221 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +// We don't expect that all the registers and fields will be used, even in the +// future. +// +// Nevertheless, it is useful to have most of them defined, like the C driver +// does. +#![allow(dead_code)] + +use kernel::bits::bit_u32; +use kernel::device::Bound; +use kernel::device::Device; +use kernel::devres::Devres; +use kernel::prelude::*; + +use crate::driver::IoMem; + +/// Represents a register in the Register Set +/// +/// TODO: Replace this with the Nova `register!()` macro when it is available. +/// In particular, this will automatically give us 64bit register reads and +/// writes. +pub(crate) struct Register<const OFFSET: usize>; + +impl<const OFFSET: usize> Register<OFFSET> { + #[inline] + pub(crate) fn read(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result<u32> { + let value = (*iomem).access(dev)?.read32(OFFSET); + Ok(value) + } + + #[inline] + pub(crate) fn write(&self, dev: &Device<Bound>, iomem: &Devres<IoMem>, value: u32) -> Result { + (*iomem).access(dev)?.write32(value, OFFSET); + Ok(()) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_IRQ_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_IRQ_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_IRQ_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_IRQ_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_IRQ_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_IRQ_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_IRQ_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_IRQ_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_IRQ_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_IRQ_MASK: Register<0x28> = Register; +pub(crate) const GPU_IRQ_STAT: Register<0x2c> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_CMD_SOFT_RESET: u32 = 1 | (1 << 8); +pub(crate) const GPU_CMD_HARD_RESET: u32 = 1 | (2 << 8); +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_IRQ_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_IRQ_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_IRQ_MASK: Register<0x1008> = Register; +pub(crate) const JOB_IRQ_STAT: Register<0x100c> = Register; + +pub(crate) const JOB_IRQ_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_IRQ_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_IRQ_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_IRQ_MASK: Register<0x2008> = Register; +pub(crate) const MMU_IRQ_STAT: Register<0x200c> = Register; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 000000000000..861d1db43072 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Arm Mali Tyr DRM driver. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting Arm's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod gem; +mod gpu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + authors: ["The Tyr driver authors"], + description: "Arm Mali Tyr DRM driver", + license: "Dual MIT/GPL", +} diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index ce5ae7cacb90..1922988625eb 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -57,7 +57,7 @@ static const struct drm_driver driver = { /* GEM hooks */ .fops = &udl_driver_fops, - DRM_GEM_SHMEM_DRIVER_OPS_NO_MAP_SGT, + DRM_GEM_SHMEM_DRIVER_OPS, DRM_FBDEV_SHMEM_DRIVER_OPS, .name = DRIVER_NAME, diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index bb7815599435..c41476ddde68 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -16,7 +16,6 @@ */ #include <linux/dma-buf.h> -#include <linux/pfn_t.h> #include <linux/vmalloc.h> #include "v3d_drv.h" diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 5e997ae8bc9c..c5a3bbbc74c5 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -46,6 +46,7 @@ MODULE_PARM_DESC(super_pages, "Enable/Disable Super Pages support."); static int v3d_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct v3d_dev *v3d = to_v3d_dev(dev); struct drm_v3d_get_param *args = data; static const u32 reg_map[] = { @@ -107,6 +108,16 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES: args->value = !!v3d->gemfs; return 0; + case DRM_V3D_PARAM_GLOBAL_RESET_COUNTER: + mutex_lock(&v3d->reset_lock); + args->value = v3d->reset_counter; + mutex_unlock(&v3d->reset_lock); + return 0; + case DRM_V3D_PARAM_CONTEXT_RESET_COUNTER: + mutex_lock(&v3d->reset_lock); + args->value = v3d_priv->reset_counter; + mutex_unlock(&v3d->reset_lock); + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; @@ -146,12 +157,24 @@ v3d_open(struct drm_device *dev, struct drm_file *file) static void v3d_postclose(struct drm_device *dev, struct drm_file *file) { + struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_file_priv *v3d_priv = file->driver_priv; + unsigned long irqflags; enum v3d_queue q; - for (q = 0; q < V3D_MAX_QUEUES; q++) + for (q = 0; q < V3D_MAX_QUEUES; q++) { + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_job *job = queue->active_job; + drm_sched_entity_destroy(&v3d_priv->sched_entity[q]); + if (job && job->base.entity == &v3d_priv->sched_entity[q]) { + spin_lock_irqsave(&queue->queue_lock, irqflags); + job->file_priv = NULL; + spin_unlock_irqrestore(&queue->queue_lock, irqflags); + } + } + v3d_perfmon_close_file(v3d_priv); kfree(v3d_priv); } diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b51f0b648a08..1884686985b8 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -58,6 +58,12 @@ struct v3d_queue_state { /* Stores the GPU stats for this queue in the global context. */ struct v3d_stats stats; + + /* Currently active job for this queue */ + struct v3d_job *active_job; + spinlock_t queue_lock; + /* Protect dma fence for signalling job completion */ + spinlock_t fence_lock; }; /* Performance monitor object. The perform lifetime is controlled by userspace @@ -101,6 +107,12 @@ enum v3d_gen { V3D_GEN_71 = 71, }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -112,6 +124,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; @@ -151,18 +165,8 @@ struct v3d_dev { struct work_struct overflow_mem_work; - struct v3d_bin_job *bin_job; - struct v3d_render_job *render_job; - struct v3d_tfu_job *tfu_job; - struct v3d_csd_job *csd_job; - struct v3d_queue_state queue[V3D_MAX_QUEUES]; - /* Spinlock used to synchronize the overflow memory - * management against bin job submission. - */ - spinlock_t job_lock; - /* Used to track the active perfmon if any. */ struct v3d_perfmon *active_perfmon; @@ -196,6 +200,11 @@ struct v3d_dev { * all jobs. */ struct v3d_perfmon *global_perfmon; + + /* Global reset counter. The counter must be incremented when + * a GPU reset happens. It must be protected by @reset_lock. + */ + unsigned int reset_counter; }; static inline struct v3d_dev * @@ -225,6 +234,12 @@ struct v3d_file_priv { /* Stores the GPU stats for a specific queue for this fd. */ struct v3d_stats stats[V3D_MAX_QUEUES]; + + /* Per-fd reset counter, must be incremented when a job submitted + * by this fd causes a GPU reset. It must be protected by + * &struct v3d_dev->reset_lock. + */ + unsigned int reset_counter; }; struct v3d_bo { @@ -308,9 +323,9 @@ struct v3d_job { struct v3d_perfmon *perfmon; /* File descriptor of the process that submitted the job that could be used - * for collecting stats by process of GPU usage. + * to collect per-process information about the GPU. */ - struct drm_file *file; + struct v3d_file_priv *file_priv; /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); @@ -551,7 +566,7 @@ void v3d_get_stats(const struct v3d_stats *stats, u64 timestamp, /* v3d_fence.c */ extern const struct dma_fence_ops v3d_fence_ops; -struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q); /* v3d_gem.c */ int v3d_gem_init(struct drm_device *dev); @@ -595,7 +610,7 @@ void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, unsigned int count); void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, unsigned int count); -void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); +void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c index 89840ed212c0..c82500a1df73 100644 --- a/drivers/gpu/drm/v3d/v3d_fence.c +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -3,8 +3,9 @@ #include "v3d_drv.h" -struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue q) { + struct v3d_queue_state *queue = &v3d->queue[q]; struct v3d_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); @@ -12,10 +13,10 @@ struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) return ERR_PTR(-ENOMEM); fence->dev = &v3d->drm; - fence->queue = queue; - fence->seqno = ++v3d->queue[queue].emit_seqno; - dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, - v3d->queue[queue].fence_context, fence->seqno); + fence->queue = q; + fence->seqno = ++queue->emit_seqno; + dma_fence_init(&fence->base, &v3d_fence_ops, &queue->fence_lock, + queue->fence_context, fence->seqno); return &fence->base; } diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d7d16da78db3..bb110d35f749 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_sms(v3d); v3d_reset_v3d(v3d); @@ -269,10 +271,12 @@ v3d_gem_init(struct drm_device *dev) queue->fence_context = dma_fence_context_alloc(1); memset(&queue->stats, 0, sizeof(queue->stats)); seqcount_init(&queue->stats.lock); + + spin_lock_init(&queue->queue_lock); + spin_lock_init(&queue->fence_lock); } spin_lock_init(&v3d->mm_lock); - spin_lock_init(&v3d->job_lock); ret = drmm_mutex_init(dev, &v3d->bo_lock); if (ret) return ret; @@ -322,6 +326,7 @@ void v3d_gem_destroy(struct drm_device *dev) { struct v3d_dev *v3d = to_v3d_dev(dev); + enum v3d_queue q; v3d_sched_fini(v3d); v3d_gemfs_fini(v3d); @@ -329,10 +334,8 @@ v3d_gem_destroy(struct drm_device *dev) /* Waiting for jobs to finish would need to be done before * unregistering V3D. */ - WARN_ON(v3d->bin_job); - WARN_ON(v3d->render_job); - WARN_ON(v3d->tfu_job); - WARN_ON(v3d->csd_job); + for (q = 0; q < V3D_MAX_QUEUES; q++) + WARN_ON(v3d->queue[q].active_job); drm_mm_takedown(&v3d->mm); diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c index 4c5e18590a5c..c1a30166c099 100644 --- a/drivers/gpu/drm/v3d/v3d_gemfs.c +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -3,14 +3,16 @@ #include <linux/fs.h> #include <linux/mount.h> +#include <linux/fs_context.h> #include "v3d_drv.h" void v3d_gemfs_init(struct v3d_dev *v3d) { - char huge_opt[] = "huge=within_size"; struct file_system_type *type; + struct fs_context *fc; struct vfsmount *gemfs; + int ret; /* * By creating our own shmemfs mountpoint, we can pass in @@ -28,8 +30,16 @@ void v3d_gemfs_init(struct v3d_dev *v3d) if (!type) goto err; - gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); - if (IS_ERR(gemfs)) + fc = fs_context_for_mount(type, SB_KERNMOUNT); + if (IS_ERR(fc)) + goto err; + ret = vfs_parse_fs_string(fc, "source", "tmpfs"); + if (!ret) + ret = vfs_parse_fs_string(fc, "huge", "within_size"); + if (!ret) + gemfs = fc_mount_longterm(fc); + put_fs_context(fc); + if (ret) goto err; v3d->gemfs = gemfs; diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2cca5d3a26a2..31ecc5b4ba5a 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -42,6 +42,8 @@ v3d_overflow_mem_work(struct work_struct *work) container_of(work, struct v3d_dev, overflow_mem_work); struct drm_device *dev = &v3d->drm; struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); + struct v3d_queue_state *queue = &v3d->queue[V3D_BIN]; + struct v3d_bin_job *bin_job; struct drm_gem_object *obj; unsigned long irqflags; @@ -60,15 +62,17 @@ v3d_overflow_mem_work(struct work_struct *work) * bin job got scheduled, that's fine. We'll just give them * some binner pool anyway. */ - spin_lock_irqsave(&v3d->job_lock, irqflags); - if (!v3d->bin_job) { - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_lock_irqsave(&queue->queue_lock, irqflags); + bin_job = (struct v3d_bin_job *)queue->active_job; + + if (!bin_job) { + spin_unlock_irqrestore(&queue->queue_lock, irqflags); goto out; } drm_gem_object_get(obj); - list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list); - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + list_add_tail(&bo->unref_head, &bin_job->render->unref_list); + spin_unlock_irqrestore(&queue->queue_lock, irqflags); v3d_mmu_flush_all(v3d); @@ -79,6 +83,20 @@ out: drm_gem_object_put(obj); } +static void +v3d_irq_signal_fence(struct v3d_dev *v3d, enum v3d_queue q, + void (*trace_irq)(struct drm_device *, uint64_t)) +{ + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_fence *fence = to_v3d_fence(queue->active_job->irq_fence); + + v3d_job_update_stats(queue->active_job, q); + trace_irq(&v3d->drm, fence->seqno); + + queue->active_job = NULL; + dma_fence_signal(&fence->base); +} + static irqreturn_t v3d_irq(int irq, void *arg) { @@ -102,41 +120,17 @@ v3d_irq(int irq, void *arg) } if (intsts & V3D_INT_FLDONE) { - struct v3d_fence *fence = - to_v3d_fence(v3d->bin_job->base.irq_fence); - - v3d_job_update_stats(&v3d->bin_job->base, V3D_BIN); - trace_v3d_bcl_irq(&v3d->drm, fence->seqno); - - v3d->bin_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_BIN, trace_v3d_bcl_irq); status = IRQ_HANDLED; } if (intsts & V3D_INT_FRDONE) { - struct v3d_fence *fence = - to_v3d_fence(v3d->render_job->base.irq_fence); - - v3d_job_update_stats(&v3d->render_job->base, V3D_RENDER); - trace_v3d_rcl_irq(&v3d->drm, fence->seqno); - - v3d->render_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_RENDER, trace_v3d_rcl_irq); status = IRQ_HANDLED; } if (intsts & V3D_INT_CSDDONE(v3d->ver)) { - struct v3d_fence *fence = - to_v3d_fence(v3d->csd_job->base.irq_fence); - - v3d_job_update_stats(&v3d->csd_job->base, V3D_CSD); - trace_v3d_csd_irq(&v3d->drm, fence->seqno); - - v3d->csd_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_CSD, trace_v3d_csd_irq); status = IRQ_HANDLED; } @@ -168,15 +162,7 @@ v3d_hub_irq(int irq, void *arg) V3D_WRITE(V3D_HUB_INT_CLR, intsts); if (intsts & V3D_HUB_INT_TFUC) { - struct v3d_fence *fence = - to_v3d_fence(v3d->tfu_job->base.irq_fence); - - v3d_job_update_stats(&v3d->tfu_job->base, V3D_TFU); - trace_v3d_tfu_irq(&v3d->drm, fence->seqno); - - v3d->tfu_job = NULL; - dma_fence_signal(&fence->base); - + v3d_irq_signal_fence(v3d, V3D_TFU, trace_v3d_tfu_irq); status = IRQ_HANDLED; } @@ -260,7 +246,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -271,17 +257,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -289,8 +282,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -331,6 +328,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d07..0ec06bfbbebb 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -139,7 +139,7 @@ static void v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue) { struct v3d_dev *v3d = job->v3d; - struct v3d_file_priv *file = job->file->driver_priv; + struct v3d_file_priv *file = job->file_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); @@ -194,12 +194,11 @@ v3d_stats_update(struct v3d_stats *stats, u64 now) } void -v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) +v3d_job_update_stats(struct v3d_job *job, enum v3d_queue q) { struct v3d_dev *v3d = job->v3d; - struct v3d_file_priv *file = job->file->driver_priv; - struct v3d_stats *global_stats = &v3d->queue[queue].stats; - struct v3d_stats *local_stats = &file->stats[queue]; + struct v3d_queue_state *queue = &v3d->queue[q]; + struct v3d_stats *global_stats = &queue->stats; u64 now = local_clock(); unsigned long flags; @@ -209,7 +208,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) else preempt_disable(); - v3d_stats_update(local_stats, now); + /* Don't update the local stats if the file context has already closed */ + spin_lock(&queue->queue_lock); + if (job->file_priv) + v3d_stats_update(&job->file_priv->stats[q], now); + spin_unlock(&queue->queue_lock); + v3d_stats_update(global_stats, now); if (IS_ENABLED(CONFIG_LOCKDEP)) @@ -222,27 +226,28 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_queue_state *queue = &v3d->queue[V3D_BIN]; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; if (unlikely(job->base.base.s_fence->finished.error)) { - spin_lock_irqsave(&v3d->job_lock, irqflags); - v3d->bin_job = NULL; - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_lock_irqsave(&queue->queue_lock, irqflags); + queue->active_job = NULL; + spin_unlock_irqrestore(&queue->queue_lock, irqflags); return NULL; } /* Lock required around bin_job update vs * v3d_overflow_mem_work(). */ - spin_lock_irqsave(&v3d->job_lock, irqflags); - v3d->bin_job = job; + spin_lock_irqsave(&queue->queue_lock, irqflags); + queue->active_job = &job->base; /* Clear out the overflow allocation, so we don't * reuse the overflow attached to a previous job. */ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); - spin_unlock_irqrestore(&v3d->job_lock, irqflags); + spin_unlock_irqrestore(&queue->queue_lock, irqflags); v3d_invalidate_caches(v3d); @@ -286,11 +291,11 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->render_job = NULL; + v3d->queue[V3D_RENDER].active_job = NULL; return NULL; } - v3d->render_job = job; + v3d->queue[V3D_RENDER].active_job = &job->base; /* Can we avoid this flush? We need to be careful of * scheduling, though -- imagine job0 rendering to texture and @@ -334,11 +339,11 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) struct dma_fence *fence; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->tfu_job = NULL; + v3d->queue[V3D_TFU].active_job = NULL; return NULL; } - v3d->tfu_job = job; + v3d->queue[V3D_TFU].active_job = &job->base; fence = v3d_fence_create(v3d, V3D_TFU); if (IS_ERR(fence)) @@ -382,11 +387,11 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) int i, csd_cfg0_reg; if (unlikely(job->base.base.s_fence->finished.error)) { - v3d->csd_job = NULL; + v3d->queue[V3D_CSD].active_job = NULL; return NULL; } - v3d->csd_job = job; + v3d->queue[V3D_CSD].active_job = &job->base; v3d_invalidate_caches(v3d); @@ -570,7 +575,7 @@ static void v3d_reset_performance_queries(struct v3d_cpu_job *job) { struct v3d_performance_query_info *performance_query = &job->performance_query; - struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_file_priv *v3d_priv = job->base.file_priv; struct v3d_dev *v3d = job->base.v3d; struct v3d_perfmon *perfmon; @@ -600,7 +605,7 @@ v3d_write_performance_query_result(struct v3d_cpu_job *job, void *data, { struct v3d_performance_query_info *performance_query = &job->performance_query; - struct v3d_file_priv *v3d_priv = job->base.file->driver_priv; + struct v3d_file_priv *v3d_priv = job->base.file_priv; struct v3d_performance_query *perf_query = &performance_query->queries[query]; struct v3d_dev *v3d = job->base.v3d; @@ -696,6 +701,7 @@ v3d_cpu_job_run(struct drm_sched_job *sched_job) trace_v3d_cpu_job_end(&v3d->drm, job->job_type); v3d_job_update_stats(&job->base, V3D_CPU); + /* Synchronous operation, so no fence to wait on. */ return NULL; } @@ -711,19 +717,24 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job) v3d_job_update_stats(job, V3D_CACHE_CLEAN); + /* Synchronous operation, so no fence to wait on. */ return NULL; } static enum drm_gpu_sched_stat -v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) +v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job, + enum v3d_queue q) { - enum v3d_queue q; + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_file_priv *v3d_priv = job->file_priv; + unsigned long irqflags; + enum v3d_queue i; mutex_lock(&v3d->reset_lock); /* block scheduler */ - for (q = 0; q < V3D_MAX_QUEUES; q++) - drm_sched_stop(&v3d->queue[q].sched, sched_job); + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_stop(&v3d->queue[i].sched, sched_job); if (sched_job) drm_sched_increase_karma(sched_job); @@ -731,27 +742,22 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) /* get the GPU back into the init state */ v3d_reset(v3d); - for (q = 0; q < V3D_MAX_QUEUES; q++) - drm_sched_resubmit_jobs(&v3d->queue[q].sched); + v3d->reset_counter++; + spin_lock_irqsave(&v3d->queue[q].queue_lock, irqflags); + if (v3d_priv) + v3d_priv->reset_counter++; + spin_unlock_irqrestore(&v3d->queue[q].queue_lock, irqflags); + + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_resubmit_jobs(&v3d->queue[i].sched); /* Unblock schedulers and restart their jobs. */ - for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_start(&v3d->queue[q].sched, 0); - } + for (i = 0; i < V3D_MAX_QUEUES; i++) + drm_sched_start(&v3d->queue[i].sched, 0); mutex_unlock(&v3d->reset_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; -} - -static void -v3d_sched_skip_reset(struct drm_sched_job *sched_job) -{ - struct drm_gpu_scheduler *sched = sched_job->sched; - - spin_lock(&sched->job_list_lock); - list_add(&sched_job->list, &sched->pending_list); - spin_unlock(&sched->job_list_lock); + return DRM_GPU_SCHED_STAT_RESET; } static enum drm_gpu_sched_stat @@ -772,11 +778,10 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, *timedout_ctca = ctca; *timedout_ctra = ctra; - v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } - return v3d_gpu_reset_for_timeout(v3d, sched_job); + return v3d_gpu_reset_for_timeout(v3d, sched_job, q); } static enum drm_gpu_sched_stat @@ -798,11 +803,11 @@ v3d_render_job_timedout(struct drm_sched_job *sched_job) } static enum drm_gpu_sched_stat -v3d_generic_job_timedout(struct drm_sched_job *sched_job) +v3d_tfu_job_timedout(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); - return v3d_gpu_reset_for_timeout(job->v3d, sched_job); + return v3d_gpu_reset_for_timeout(job->v3d, sched_job, V3D_TFU); } static enum drm_gpu_sched_stat @@ -818,11 +823,10 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) if (job->timedout_batches != batches) { job->timedout_batches = batches; - v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } - return v3d_gpu_reset_for_timeout(v3d, sched_job); + return v3d_gpu_reset_for_timeout(v3d, sched_job, V3D_CSD); } static const struct drm_sched_backend_ops v3d_bin_sched_ops = { @@ -839,7 +843,7 @@ static const struct drm_sched_backend_ops v3d_render_sched_ops = { static const struct drm_sched_backend_ops v3d_tfu_sched_ops = { .run_job = v3d_tfu_job_run, - .timedout_job = v3d_generic_job_timedout, + .timedout_job = v3d_tfu_job_timedout, .free_job = v3d_sched_job_free, }; @@ -851,13 +855,11 @@ static const struct drm_sched_backend_ops v3d_csd_sched_ops = { static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { .run_job = v3d_cache_clean_job_run, - .timedout_job = v3d_generic_job_timedout, .free_job = v3d_sched_job_free }; static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { .run_job = v3d_cpu_job_run, - .timedout_job = v3d_generic_job_timedout, .free_job = v3d_cpu_job_free }; diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 5171ffe9012d..f3652e90683c 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -166,7 +166,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, job->v3d = v3d; job->free = free; - job->file = file_priv; + job->file_priv = v3d_priv; ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 1, v3d_priv, file_priv->client_id); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f5b167417428..8f983edb81ff 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -530,6 +530,7 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state) static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -568,7 +569,7 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, mode_cmd = &mode_cmd_local; } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* Our CTM has some peculiar limitations: we can only enable it for one CRTC diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 2752ab4f1c97..260c64733972 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -32,7 +32,7 @@ #include <linux/dma-buf.h> #include <linux/module.h> -#include <linux/platform_device.h> +#include <linux/device/faux.h> #include <linux/shmem_fs.h> #include <linux/vmalloc.h> @@ -52,7 +52,7 @@ static struct vgem_device { struct drm_device drm; - struct platform_device *platform; + struct faux_device *faux_dev; } *vgem_device; static int vgem_open(struct drm_device *dev, struct drm_file *file) @@ -127,27 +127,27 @@ static const struct drm_driver vgem_driver = { static int __init vgem_init(void) { int ret; - struct platform_device *pdev; + struct faux_device *fdev; - pdev = platform_device_register_simple("vgem", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + fdev = faux_device_create("vgem", NULL, NULL); + if (!fdev) + return -ENODEV; - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + if (!devres_open_group(&fdev->dev, NULL, GFP_KERNEL)) { ret = -ENOMEM; goto out_unregister; } - dma_coerce_mask_and_coherent(&pdev->dev, + dma_coerce_mask_and_coherent(&fdev->dev, DMA_BIT_MASK(64)); - vgem_device = devm_drm_dev_alloc(&pdev->dev, &vgem_driver, + vgem_device = devm_drm_dev_alloc(&fdev->dev, &vgem_driver, struct vgem_device, drm); if (IS_ERR(vgem_device)) { ret = PTR_ERR(vgem_device); goto out_devres; } - vgem_device->platform = pdev; + vgem_device->faux_dev = fdev; /* Final step: expose the device/driver to userspace */ ret = drm_dev_register(&vgem_device->drm, 0); @@ -157,19 +157,19 @@ static int __init vgem_init(void) return 0; out_devres: - devres_release_group(&pdev->dev, NULL); + devres_release_group(&fdev->dev, NULL); out_unregister: - platform_device_unregister(pdev); + faux_device_destroy(fdev); return ret; } static void __exit vgem_exit(void) { - struct platform_device *pdev = vgem_device->platform; + struct faux_device *fdev = vgem_device->faux_dev; drm_dev_unregister(&vgem_device->drm); - devres_release_group(&pdev->dev, NULL); - platform_device_unregister(pdev); + devres_release_group(&fdev->dev, NULL); + faux_device_destroy(fdev); } module_init(vgem_init); diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 59a45e74a641..c3315935d8bc 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -66,6 +66,7 @@ static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = { static int virtio_gpu_framebuffer_init(struct drm_device *dev, struct virtio_gpu_framebuffer *vgfb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { @@ -73,7 +74,7 @@ virtio_gpu_framebuffer_init(struct drm_device *dev, vgfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &vgfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vgfb->base, info, mode_cmd); ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs); if (ret) { @@ -130,9 +131,8 @@ static void virtio_gpu_crtc_atomic_flush(struct drm_crtc *crtc, * in the plane update callback, and here we just check * whenever we must force the modeset. */ - if (drm_atomic_crtc_needs_modeset(crtc_state)) { + if (drm_atomic_crtc_needs_modeset(crtc_state)) output->needs_modeset = true; - } } static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = { @@ -293,6 +293,7 @@ static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index) static struct drm_framebuffer * virtio_gpu_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj = NULL; @@ -314,7 +315,7 @@ virtio_gpu_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj); + ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, info, mode_cmd, obj); if (ret) { kfree(virtio_gpu_fb); drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index e32e680c7197..71c6ccad4b99 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -130,10 +130,10 @@ static void virtio_gpu_remove(struct virtio_device *vdev) static void virtio_gpu_shutdown(struct virtio_device *vdev) { - /* - * drm does its own synchronization on shutdown. - * Do nothing here, opt out of device reset. - */ + struct drm_device *dev = vdev->priv; + + /* stop talking to the device */ + drm_dev_unplug(dev); } static void virtio_gpu_config_changed(struct virtio_device *vdev) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f7def8b42068..f17660a71a3e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -88,7 +88,6 @@ struct virtio_gpu_object_params { struct virtio_gpu_object { struct drm_gem_shmem_object base; - struct dma_buf *dma_buf; struct sg_table *sgt; uint32_t hw_res_handle; bool dumb; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 7dfb2006c561..1c15cbf326b7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -162,18 +162,18 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev) if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_VIRGL)) vgdev->has_virgl_3d = true; #endif - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_EDID)) { + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_EDID)) vgdev->has_edid = true; - } - if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + + if (virtio_has_feature(vgdev->vdev, VIRTIO_RING_F_INDIRECT_DESC)) vgdev->has_indirect = true; - } - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) { + + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_UUID)) vgdev->has_resource_assign_uuid = true; - } - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) { + + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_RESOURCE_BLOB)) vgdev->has_resource_blob = true; - } + if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { if (!devm_request_mem_region(&vgdev->vdev->dev, @@ -193,9 +193,9 @@ int virtio_gpu_init(struct virtio_device *vdev, struct drm_device *dev) (unsigned long)vgdev->host_visible_region.addr, (unsigned long)vgdev->host_visible_region.len); } - if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) { + + if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_CONTEXT_INIT)) vgdev->has_context_init = true; - } DRM_INFO("features: %cvirgl %cedid %cresource_blob %chost_visible", vgdev->has_virgl_3d ? '+' : '-', diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 5517cff8715c..e6363c887500 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -47,6 +47,7 @@ int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) *resid = handle + 1; } else { int handle = ida_alloc(&vgdev->resource_ida, GFP_KERNEL); + if (handle < 0) return handle; *resid = handle + 1; @@ -56,9 +57,8 @@ int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, uint32_t *resid) static void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id) { - if (!virtio_gpu_virglrenderer_workaround) { + if (!virtio_gpu_virglrenderer_workaround) ida_free(&vgdev->resource_ida, id - 1); - } } void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 698ea7adb951..29e4b458ae57 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -120,7 +120,7 @@ static int virtio_gpu_plane_atomic_check(struct drm_plane *plane, crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc); if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); + return PTR_ERR(crtc_state); ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, DRM_PLANE_NO_SCALING, diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 722cde5e2d86..ce49282198cb 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -204,15 +204,16 @@ static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj) { struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); struct virtio_gpu_device *vgdev = obj->dev->dev_private; + struct dma_buf_attachment *attach = obj->import_attach; if (drm_gem_is_imported(obj)) { - struct dma_buf *dmabuf = bo->dma_buf; + struct dma_buf *dmabuf = attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); virtgpu_dma_buf_unmap(bo); dma_resv_unlock(dmabuf->resv); - dma_buf_detach(dmabuf, obj->import_attach); + dma_buf_detach(dmabuf, attach); dma_buf_put(dmabuf); } @@ -332,7 +333,6 @@ struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, obj->import_attach = attach; get_dma_buf(buf); - bo->dma_buf = buf; ret = virtgpu_dma_buf_init_obj(dev, bo, attach); if (ret < 0) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 55a15e247dd1..8181b22b9b46 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -248,6 +248,7 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work) if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA)) { if (le32_to_cpu(resp->type) >= VIRTIO_GPU_RESP_ERR_UNSPEC) { struct virtio_gpu_ctrl_hdr *cmd; + cmd = virtio_gpu_vbuf_ctrl_hdr(entry); DRM_ERROR_RATELIMITED("response 0x%x (command 0x%x)\n", le32_to_cpu(resp->type), @@ -468,6 +469,7 @@ static int virtio_gpu_queue_fenced_ctrl_buffer(struct virtio_gpu_device *vgdev, if (vbuf->data_size) { if (is_vmalloc_addr(vbuf->data_buf)) { int sg_ents; + sgt = vmalloc_to_sgt(vbuf->data_buf, vbuf->data_size, &sg_ents); if (!sgt) { diff --git a/drivers/gpu/drm/vkms/tests/vkms_config_test.c b/drivers/gpu/drm/vkms/tests/vkms_config_test.c index ff4566cf9925..b0d78a81d2df 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_config_test.c +++ b/drivers/gpu/drm/vkms/tests/vkms_config_test.c @@ -200,6 +200,7 @@ static void vkms_config_test_get_planes(struct kunit *test) KUNIT_ASSERT_EQ(test, n_planes, 0); plane_cfg1 = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg1); vkms_config_for_each_plane(config, plane_cfg) { n_planes++; if (plane_cfg != plane_cfg1) @@ -209,6 +210,7 @@ static void vkms_config_test_get_planes(struct kunit *test) n_planes = 0; plane_cfg2 = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg2); vkms_config_for_each_plane(config, plane_cfg) { n_planes++; if (plane_cfg != plane_cfg1 && plane_cfg != plane_cfg2) @@ -242,6 +244,7 @@ static void vkms_config_test_get_crtcs(struct kunit *test) KUNIT_FAIL(test, "Unexpected CRTC"); crtc_cfg1 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); KUNIT_ASSERT_EQ(test, vkms_config_get_num_crtcs(config), 1); vkms_config_for_each_crtc(config, crtc_cfg) { if (crtc_cfg != crtc_cfg1) @@ -249,6 +252,7 @@ static void vkms_config_test_get_crtcs(struct kunit *test) } crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); KUNIT_ASSERT_EQ(test, vkms_config_get_num_crtcs(config), 2); vkms_config_for_each_crtc(config, crtc_cfg) { if (crtc_cfg != crtc_cfg1 && crtc_cfg != crtc_cfg2) @@ -280,6 +284,7 @@ static void vkms_config_test_get_encoders(struct kunit *test) KUNIT_ASSERT_EQ(test, n_encoders, 0); encoder_cfg1 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); vkms_config_for_each_encoder(config, encoder_cfg) { n_encoders++; if (encoder_cfg != encoder_cfg1) @@ -289,6 +294,7 @@ static void vkms_config_test_get_encoders(struct kunit *test) n_encoders = 0; encoder_cfg2 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); vkms_config_for_each_encoder(config, encoder_cfg) { n_encoders++; if (encoder_cfg != encoder_cfg1 && encoder_cfg != encoder_cfg2) @@ -324,6 +330,7 @@ static void vkms_config_test_get_connectors(struct kunit *test) KUNIT_ASSERT_EQ(test, n_connectors, 0); connector_cfg1 = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg1); vkms_config_for_each_connector(config, connector_cfg) { n_connectors++; if (connector_cfg != connector_cfg1) @@ -333,6 +340,7 @@ static void vkms_config_test_get_connectors(struct kunit *test) n_connectors = 0; connector_cfg2 = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg2); vkms_config_for_each_connector(config, connector_cfg) { n_connectors++; if (connector_cfg != connector_cfg1 && @@ -370,7 +378,7 @@ static void vkms_config_test_invalid_plane_number(struct kunit *test) /* Invalid: Too many planes */ for (n = 0; n <= 32; n++) - vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_plane(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -395,6 +403,7 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: No primary plane */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_OVERLAY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -402,11 +411,13 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Multiple primary planes */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -419,11 +430,13 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Multiple cursor planes */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_CURSOR); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_CURSOR); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -437,12 +450,16 @@ static void vkms_config_test_valid_plane_type(struct kunit *test) /* Invalid: Second CRTC without primary plane */ crtc_cfg = vkms_config_create_crtc(config); encoder_cfg = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg); + err = vkms_config_encoder_attach_crtc(encoder_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); /* Valid: Second CRTC with a primary plane */ plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg); KUNIT_EXPECT_EQ(test, err, 0); @@ -486,7 +503,7 @@ static void vkms_config_test_invalid_crtc_number(struct kunit *test) /* Invalid: Too many CRTCs */ for (n = 0; n <= 32; n++) - vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_crtc(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -509,7 +526,7 @@ static void vkms_config_test_invalid_encoder_number(struct kunit *test) /* Invalid: Too many encoders */ for (n = 0; n <= 32; n++) - vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_encoder(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -531,12 +548,15 @@ static void vkms_config_test_valid_encoder_possible_crtcs(struct kunit *test) /* Invalid: Encoder without a possible CRTC */ encoder_cfg = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); /* Valid: Second CRTC with shared encoder */ crtc_cfg2 = vkms_config_create_crtc(config); - plane_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg); + vkms_config_plane_set_type(plane_cfg, DRM_PLANE_TYPE_PRIMARY); err = vkms_config_plane_attach_crtc(plane_cfg, crtc_cfg2); KUNIT_EXPECT_EQ(test, err, 0); @@ -577,7 +597,7 @@ static void vkms_config_test_invalid_connector_number(struct kunit *test) /* Invalid: Too many connectors */ for (n = 0; n <= 32; n++) - connector_cfg = vkms_config_create_connector(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vkms_config_create_connector(config)); KUNIT_EXPECT_FALSE(test, vkms_config_is_valid(config)); @@ -669,13 +689,19 @@ static void vkms_config_test_plane_attach_crtc(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, config); overlay_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, overlay_cfg); vkms_config_plane_set_type(overlay_cfg, DRM_PLANE_TYPE_OVERLAY); + primary_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, primary_cfg); vkms_config_plane_set_type(primary_cfg, DRM_PLANE_TYPE_PRIMARY); + cursor_cfg = vkms_config_create_plane(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cursor_cfg); vkms_config_plane_set_type(cursor_cfg, DRM_PLANE_TYPE_CURSOR); crtc_cfg = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg); /* No primary or cursor planes */ KUNIT_EXPECT_NULL(test, vkms_config_crtc_primary_plane(config, crtc_cfg)); @@ -735,6 +761,11 @@ static void vkms_config_test_plane_get_possible_crtcs(struct kunit *test) crtc_cfg1 = vkms_config_create_crtc(config); crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, plane_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + /* No possible CRTCs */ vkms_config_plane_for_each_possible_crtc(plane_cfg1, idx, possible_crtc) KUNIT_FAIL(test, "Unexpected possible CRTC"); @@ -799,6 +830,11 @@ static void vkms_config_test_encoder_get_possible_crtcs(struct kunit *test) crtc_cfg1 = vkms_config_create_crtc(config); crtc_cfg2 = vkms_config_create_crtc(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_cfg2); + /* No possible CRTCs */ vkms_config_encoder_for_each_possible_crtc(encoder_cfg1, idx, possible_crtc) KUNIT_FAIL(test, "Unexpected possible CRTC"); @@ -863,6 +899,11 @@ static void vkms_config_test_connector_get_possible_encoders(struct kunit *test) encoder_cfg1 = vkms_config_create_encoder(config); encoder_cfg2 = vkms_config_create_encoder(config); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, connector_cfg2); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder_cfg2); + /* No possible encoders */ vkms_config_connector_for_each_possible_encoder(connector_cfg1, idx, possible_encoder) diff --git a/drivers/gpu/drm/vkms/tests/vkms_format_test.c b/drivers/gpu/drm/vkms/tests/vkms_format_test.c index 2e1daef94831..a7788fbc45dc 100644 --- a/drivers/gpu/drm/vkms/tests/vkms_format_test.c +++ b/drivers/gpu/drm/vkms/tests/vkms_format_test.c @@ -14,20 +14,20 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); /** - * struct pixel_yuv_u8 - Internal representation of a pixel color. - * @y: Luma value, stored in 8 bits, without padding, using + * struct pixel_yuv_u16 - Internal representation of a pixel color. + * @y: Luma value, stored in 16 bits, without padding, using * machine endianness - * @u: Blue difference chroma value, stored in 8 bits, without padding, using + * @u: Blue difference chroma value, stored in 16 bits, without padding, using * machine endianness - * @v: Red difference chroma value, stored in 8 bits, without padding, using + * @v: Red difference chroma value, stored in 16 bits, without padding, using * machine endianness */ -struct pixel_yuv_u8 { - u8 y, u, v; +struct pixel_yuv_u16 { + u16 y, u, v; }; /* - * struct yuv_u8_to_argb_u16_case - Reference values to test the color + * struct yuv_u16_to_argb_u16_case - Reference values to test the color * conversions in VKMS between YUV to ARGB * * @encoding: Encoding used to convert RGB to YUV @@ -39,13 +39,13 @@ struct pixel_yuv_u8 { * @format_pair.yuv: Same color as @format_pair.rgb, but converted to * YUV using @encoding and @range. */ -struct yuv_u8_to_argb_u16_case { +struct yuv_u16_to_argb_u16_case { enum drm_color_encoding encoding; enum drm_color_range range; size_t n_colors; struct format_pair { char *name; - struct pixel_yuv_u8 yuv; + struct pixel_yuv_u16 yuv; struct pixel_argb_u16 argb; } colors[TEST_BUFF_SIZE]; }; @@ -57,14 +57,14 @@ struct yuv_u8_to_argb_u16_case { * For more information got to the docs: * https://colour.readthedocs.io/en/master/generated/colour.RGB_to_YCbCr.html */ -static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { +static struct yuv_u16_to_argb_u16_case yuv_u16_to_argb_u16_cases[] = { /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, * K=colour.WEIGHTS_YCBCR["ITU-R BT.601"], * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * @@ -76,13 +76,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x4c, 0x55, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0x96, 0x2c, 0x15 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x1d, 0xff, 0x6b }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4c8b, 0x54ce, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0x9645, 0x2b33, 0x14d1 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1d2f, 0xffff, 0x6b2f }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -90,7 +90,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -101,13 +101,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x51, 0x5a, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0x91, 0x36, 0x22 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x29, 0xf0, 0x6e }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x517b, 0x5a34, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0x908e, 0x35cc, 0x2237 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x28f7, 0xf000, 0x6dc9 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -115,7 +115,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -126,21 +126,21 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x36, 0x63, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xb6, 0x1e, 0x0c }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x12, 0xff, 0x74 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x366d, 0x62ac, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xb717, 0x1d55, 0x0bbd }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x127c, 0xffff, 0x7443 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, * K=colour.WEIGHTS_YCBCR["ITU-R BT.709"], * in_bits = 16, - * int_legal = False, + * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -151,13 +151,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x3f, 0x66, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xad, 0x2a, 0x1a }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x20, 0xf0, 0x76 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x3e8f, 0x6656, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xaca1, 0x29aa, 0x1a45 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1fd0, 0xf000, 0x75bb }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -165,7 +165,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = False, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -176,13 +176,13 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_FULL_RANGE, .n_colors = 6, .colors = { - { "white", { 0xff, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x80, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x00, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x43, 0x5c, 0xff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xad, 0x24, 0x0b }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x0f, 0xff, 0x76 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xffff, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x8080, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x0000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4340, 0x5c41, 0xffff }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xad91, 0x23bf, 0x0a4c }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x0f2e, 0xffff, 0x75b5 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, /* * colour.RGB_to_YCbCr(<rgb color in 16 bit form>, @@ -190,7 +190,7 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { * in_bits = 16, * in_legal = False, * in_int = True, - * out_bits = 8, + * out_bits = 16, * out_legal = True, * out_int = True) * Tests cases for color conversion generated by converting RGB @@ -201,32 +201,30 @@ static struct yuv_u8_to_argb_u16_case yuv_u8_to_argb_u16_cases[] = { .range = DRM_COLOR_YCBCR_LIMITED_RANGE, .n_colors = 6, .colors = { - { "white", { 0xeb, 0x80, 0x80 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, - { "gray", { 0x7e, 0x80, 0x80 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, - { "black", { 0x10, 0x80, 0x80 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, - { "red", { 0x4a, 0x61, 0xf0 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, - { "green", { 0xa4, 0x2f, 0x19 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, - { "blue", { 0x1d, 0xf0, 0x77 }, { 0xffff, 0x0000, 0x0000, 0xffff }}, - }, + { "white", { 0xeb00, 0x8000, 0x8000 }, { 0xffff, 0xffff, 0xffff, 0xffff }}, + { "gray", { 0x7dee, 0x8000, 0x8000 }, { 0xffff, 0x8080, 0x8080, 0x8080 }}, + { "black", { 0x1000, 0x8000, 0x8000 }, { 0xffff, 0x0000, 0x0000, 0x0000 }}, + { "red", { 0x4988, 0x60b9, 0xf000 }, { 0xffff, 0xffff, 0x0000, 0x0000 }}, + { "green", { 0xa47b, 0x2f47, 0x1902 }, { 0xffff, 0x0000, 0xffff, 0x0000 }}, + { "blue", { 0x1cfd, 0xf000, 0x76fe }, { 0xffff, 0x0000, 0x0000, 0xffff }}, + } }, }; /* - * vkms_format_test_yuv_u8_to_argb_u16 - Testing the conversion between YUV + * vkms_format_test_yuv_u16_to_argb_u16 - Testing the conversion between YUV * colors to ARGB colors in VKMS * * This test will use the functions get_conversion_matrix_to_argb_u16 and - * argb_u16_from_yuv888 to convert YUV colors (stored in - * yuv_u8_to_argb_u16_cases) into ARGB colors. + * argb_u16_from_yuv161616 to convert YUV colors (stored in + * yuv_u16_to_argb_u16_cases) into ARGB colors. * * The conversion between YUV and RGB is not totally reversible, so there may be * some difference between the expected value and the result. - * In addition, there may be some rounding error as the input color is 8 bits - * and output color is 16 bits. */ -static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) +static void vkms_format_test_yuv_u16_to_argb_u16(struct kunit *test) { - const struct yuv_u8_to_argb_u16_case *param = test->param_value; + const struct yuv_u16_to_argb_u16_case *param = test->param_value; struct pixel_argb_u16 argb; for (size_t i = 0; i < param->n_colors; i++) { @@ -236,7 +234,8 @@ static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) get_conversion_matrix_to_argb_u16 (DRM_FORMAT_NV12, param->encoding, param->range, &matrix); - argb = argb_u16_from_yuv888(color->yuv.y, color->yuv.u, color->yuv.v, &matrix); + argb = argb_u16_from_yuv161616(&matrix, color->yuv.y, color->yuv.u, + color->yuv.v); KUNIT_EXPECT_LE_MSG(test, abs_diff(argb.a, color->argb.a), 0x1ff, "On the A channel of the color %s expected 0x%04x, got 0x%04x", @@ -253,19 +252,19 @@ static void vkms_format_test_yuv_u8_to_argb_u16(struct kunit *test) } } -static void vkms_format_test_yuv_u8_to_argb_u16_case_desc(struct yuv_u8_to_argb_u16_case *t, - char *desc) +static void vkms_format_test_yuv_u16_to_argb_u16_case_desc(struct yuv_u16_to_argb_u16_case *t, + char *desc) { snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s - %s", drm_get_color_encoding_name(t->encoding), drm_get_color_range_name(t->range)); } -KUNIT_ARRAY_PARAM(yuv_u8_to_argb_u16, yuv_u8_to_argb_u16_cases, - vkms_format_test_yuv_u8_to_argb_u16_case_desc +KUNIT_ARRAY_PARAM(yuv_u16_to_argb_u16, yuv_u16_to_argb_u16_cases, + vkms_format_test_yuv_u16_to_argb_u16_case_desc ); static struct kunit_case vkms_format_test_cases[] = { - KUNIT_CASE_PARAM(vkms_format_test_yuv_u8_to_argb_u16, yuv_u8_to_argb_u16_gen_params), + KUNIT_CASE_PARAM(vkms_format_test_yuv_u16_to_argb_u16, yuv_u16_to_argb_u16_gen_params), {} }; diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 8c9898b9055d..e60573e0f3e9 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -302,8 +302,6 @@ struct vkms_output *vkms_crtc_init(struct drm_device *dev, struct drm_plane *pri vkms_out->composer_workq = drmm_alloc_ordered_workqueue(dev, "vkms_composer", 0); if (IS_ERR(vkms_out->composer_workq)) return ERR_CAST(vkms_out->composer_workq); - if (!vkms_out->composer_workq) - return ERR_PTR(-ENOMEM); return vkms_out; } diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index a24d1655f7b8..e8472d9b6e3b 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -10,7 +10,7 @@ */ #include <linux/module.h> -#include <linux/platform_device.h> +#include <linux/device/faux.h> #include <linux/dma-mapping.h> #include <drm/clients/drm_client_setup.h> @@ -149,27 +149,27 @@ static int vkms_modeset_init(struct vkms_device *vkmsdev) static int vkms_create(struct vkms_config *config) { int ret; - struct platform_device *pdev; + struct faux_device *fdev; struct vkms_device *vkms_device; const char *dev_name; dev_name = vkms_config_get_device_name(config); - pdev = platform_device_register_simple(dev_name, -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + fdev = faux_device_create(dev_name, NULL, NULL); + if (!fdev) + return -ENODEV; - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + if (!devres_open_group(&fdev->dev, NULL, GFP_KERNEL)) { ret = -ENOMEM; goto out_unregister; } - vkms_device = devm_drm_dev_alloc(&pdev->dev, &vkms_driver, + vkms_device = devm_drm_dev_alloc(&fdev->dev, &vkms_driver, struct vkms_device, drm); if (IS_ERR(vkms_device)) { ret = PTR_ERR(vkms_device); goto out_devres; } - vkms_device->platform = pdev; + vkms_device->faux_dev = fdev; vkms_device->config = config; config->dev = vkms_device; @@ -203,9 +203,9 @@ static int vkms_create(struct vkms_config *config) return 0; out_devres: - devres_release_group(&pdev->dev, NULL); + devres_release_group(&fdev->dev, NULL); out_unregister: - platform_device_unregister(pdev); + faux_device_destroy(fdev); return ret; } @@ -231,19 +231,19 @@ static int __init vkms_init(void) static void vkms_destroy(struct vkms_config *config) { - struct platform_device *pdev; + struct faux_device *fdev; if (!config->dev) { DRM_INFO("vkms_device is NULL.\n"); return; } - pdev = config->dev->platform; + fdev = config->dev->faux_dev; drm_dev_unregister(&config->dev->drm); drm_atomic_helper_shutdown(&config->dev->drm); - devres_release_group(&pdev->dev, NULL); - platform_device_unregister(pdev); + devres_release_group(&fdev->dev, NULL); + faux_device_destroy(fdev); config->dev = NULL; } diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 68b5e3fce455..8013c31efe3b 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -232,13 +232,13 @@ struct vkms_config; * struct vkms_device - Description of a VKMS device * * @drm - Base device in DRM - * @platform - Associated platform device + * @faux_dev - Associated faux device * @output - Configuration and sub-components of the VKMS device * @config: Configuration used in this VKMS device */ struct vkms_device { struct drm_device drm; - struct platform_device *platform; + struct faux_device *faux_dev; const struct vkms_config *config; }; diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 6d0227c6635a..dfb8e13cba87 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -259,16 +259,27 @@ static struct pixel_argb_u16 argb_u16_from_grayu16(u16 gray) return argb_u16_from_u16161616(0xFFFF, gray, gray, gray); } -VISIBLE_IF_KUNIT struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, u8 channel_2, - const struct conversion_matrix *matrix) +static struct pixel_argb_u16 argb_u16_from_BGR565(const __le16 *pixel) +{ + struct pixel_argb_u16 out_pixel; + + out_pixel = argb_u16_from_RGB565(pixel); + swap(out_pixel.r, out_pixel.b); + + return out_pixel; +} + +VISIBLE_IF_KUNIT +struct pixel_argb_u16 argb_u16_from_yuv161616(const struct conversion_matrix *matrix, + u16 y, u16 channel_1, u16 channel_2) { u16 r, g, b; s64 fp_y, fp_channel_1, fp_channel_2; s64 fp_r, fp_g, fp_b; - fp_y = drm_int2fixp(((int)y - matrix->y_offset) * 257); - fp_channel_1 = drm_int2fixp(((int)channel_1 - 128) * 257); - fp_channel_2 = drm_int2fixp(((int)channel_2 - 128) * 257); + fp_y = drm_int2fixp((int)y - matrix->y_offset * 257); + fp_channel_1 = drm_int2fixp((int)channel_1 - 128 * 257); + fp_channel_2 = drm_int2fixp((int)channel_2 - 128 * 257); fp_r = drm_fixp_mul(matrix->matrix[0][0], fp_y) + drm_fixp_mul(matrix->matrix[0][1], fp_channel_1) + @@ -290,7 +301,65 @@ VISIBLE_IF_KUNIT struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, return argb_u16_from_u16161616(0xffff, r, g, b); } -EXPORT_SYMBOL_IF_KUNIT(argb_u16_from_yuv888); +EXPORT_SYMBOL_IF_KUNIT(argb_u16_from_yuv161616); + +/** + * READ_LINE() - Generic generator for a read_line function which can be used for format with one + * plane and a block_h == block_w == 1. + * + * @function_name: Function name to generate + * @pixel_name: Temporary pixel name used in the @__VA_ARGS__ parameters + * @pixel_type: Used to specify the type you want to cast the pixel pointer + * @callback: Callback to call for each pixels. This fonction should take @__VA_ARGS__ as parameter + * and return a pixel_argb_u16 + * __VA_ARGS__: Argument to pass inside the callback. You can use @pixel_name to access current + * pixel. + */ +#define READ_LINE(function_name, pixel_name, pixel_type, callback, ...) \ +static void function_name(const struct vkms_plane_state *plane, int x_start, \ + int y_start, enum pixel_read_direction direction, int count, \ + struct pixel_argb_u16 out_pixel[]) \ +{ \ + struct pixel_argb_u16 *end = out_pixel + count; \ + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); \ + u8 *src_pixels; \ + \ + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); \ + \ + while (out_pixel < end) { \ + pixel_type *(pixel_name) = (pixel_type *)src_pixels; \ + *out_pixel = (callback)(__VA_ARGS__); \ + out_pixel += 1; \ + src_pixels += step; \ + } \ +} + +/** + * READ_LINE_ARGB8888() - Generic generator for ARGB8888 formats. + * The pixel type used is u8, so pixel_name[0]..pixel_name[n] are the n components of the pixel. + * + * @function_name: Function name to generate + * @pixel_name: temporary pixel to use in @a, @r, @g and @b parameters + * @a: alpha value + * @r: red value + * @g: green value + * @b: blue value + */ +#define READ_LINE_ARGB8888(function_name, pixel_name, a, r, g, b) \ + READ_LINE(function_name, pixel_name, u8, argb_u16_from_u8888, a, r, g, b) +/** + * READ_LINE_le16161616() - Generic generator for ARGB16161616 formats. + * The pixel type used is u16, so pixel_name[0]..pixel_name[n] are the n components of the pixel. + * + * @function_name: Function name to generate + * @pixel_name: temporary pixel to use in @a, @r, @g and @b parameters + * @a: alpha value + * @r: red value + * @g: green value + * @b: blue value + */ +#define READ_LINE_le16161616(function_name, pixel_name, a, r, g, b) \ + READ_LINE(function_name, pixel_name, __le16, argb_u16_from_le16161616, a, r, g, b) /* * The following functions are read_line function for each pixel format supported by VKMS. @@ -378,138 +447,27 @@ static void R4_read_line(const struct vkms_plane_state *plane, int x_start, Rx_read_line(plane, x_start, y_start, direction, count, out_pixel); } -static void R8_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - while (out_pixel < end) { - *out_pixel = argb_u16_from_gray8(*src_pixels); - src_pixels += step; - out_pixel += 1; - } -} - -static void ARGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - *out_pixel = argb_u16_from_u8888(px[3], px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void XRGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - *out_pixel = argb_u16_from_u8888(255, px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void ABGR8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, - enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - u8 *px = (u8 *)src_pixels; - /* Switch blue and red pixels. */ - *out_pixel = argb_u16_from_u8888(px[3], px[0], px[1], px[2]); - out_pixel += 1; - src_pixels += step; - } -} - -static void ARGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - while (out_pixel < end) { - u16 *px = (u16 *)src_pixels; - *out_pixel = argb_u16_from_u16161616(px[3], px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} +READ_LINE_ARGB8888(XRGB8888_read_line, px, 0xFF, px[2], px[1], px[0]) +READ_LINE_ARGB8888(XBGR8888_read_line, px, 0xFF, px[0], px[1], px[2]) -static void XRGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - - while (out_pixel < end) { - __le16 *px = (__le16 *)src_pixels; - *out_pixel = argb_u16_from_le16161616(cpu_to_le16(0xFFFF), px[2], px[1], px[0]); - out_pixel += 1; - src_pixels += step; - } -} - -static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - struct pixel_argb_u16 *end = out_pixel + count; - u8 *src_pixels; +READ_LINE_ARGB8888(ARGB8888_read_line, px, px[3], px[2], px[1], px[0]) +READ_LINE_ARGB8888(ABGR8888_read_line, px, px[3], px[0], px[1], px[2]) +READ_LINE_ARGB8888(RGBA8888_read_line, px, px[0], px[3], px[2], px[1]) +READ_LINE_ARGB8888(BGRA8888_read_line, px, px[0], px[1], px[2], px[3]) - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); +READ_LINE_ARGB8888(RGB888_read_line, px, 0xFF, px[2], px[1], px[0]) +READ_LINE_ARGB8888(BGR888_read_line, px, 0xFF, px[0], px[1], px[2]) - int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); +READ_LINE_le16161616(ARGB16161616_read_line, px, px[3], px[2], px[1], px[0]) +READ_LINE_le16161616(ABGR16161616_read_line, px, px[3], px[0], px[1], px[2]) +READ_LINE_le16161616(XRGB16161616_read_line, px, cpu_to_le16(0xFFFF), px[2], px[1], px[0]) +READ_LINE_le16161616(XBGR16161616_read_line, px, cpu_to_le16(0xFFFF), px[0], px[1], px[2]) - while (out_pixel < end) { - __le16 *px = (__le16 *)src_pixels; +READ_LINE(RGB565_read_line, px, __le16, argb_u16_from_RGB565, px) +READ_LINE(BGR565_read_line, px, __le16, argb_u16_from_BGR565, px) - *out_pixel = argb_u16_from_RGB565(px); - out_pixel += 1; - src_pixels += step; - } -} +READ_LINE(R8_read_line, px, u8, argb_u16_from_gray8, *px) /* * This callback can be used for YUV formats where U and V values are @@ -521,35 +479,57 @@ static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, * - Convert YUV and YVU with the same function (a column swap is needed when setting up * plane->conversion_matrix) */ -static void semi_planar_yuv_read_line(const struct vkms_plane_state *plane, int x_start, - int y_start, enum pixel_read_direction direction, int count, - struct pixel_argb_u16 out_pixel[]) -{ - u8 *y_plane; - u8 *uv_plane; - - packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, - &y_plane); - packed_pixels_addr_1x1(plane->frame_info, - x_start / plane->frame_info->fb->format->hsub, - y_start / plane->frame_info->fb->format->vsub, 1, - &uv_plane); - int step_y = get_block_step_bytes(plane->frame_info->fb, direction, 0); - int step_uv = get_block_step_bytes(plane->frame_info->fb, direction, 1); - int subsampling = get_subsampling(plane->frame_info->fb->format, direction); - int subsampling_offset = get_subsampling_offset(direction, x_start, y_start); - const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; - - for (int i = 0; i < count; i++) { - *out_pixel = argb_u16_from_yuv888(y_plane[0], uv_plane[0], uv_plane[1], - conversion_matrix); - out_pixel += 1; - y_plane += step_y; - if ((i + subsampling_offset + 1) % subsampling == 0) - uv_plane += step_uv; - } -} +/** + * READ_LINE_YUV_SEMIPLANAR() - Generic generator for a read_line function which can be used for yuv + * formats with two planes and block_w == block_h == 1. + * + * @function_name: Function name to generate + * @pixel_1_name: temporary pixel name for the first plane used in the @__VA_ARGS__ parameters + * @pixel_2_name: temporary pixel name for the second plane used in the @__VA_ARGS__ parameters + * @pixel_1_type: Used to specify the type you want to cast the pixel pointer on the plane 1 + * @pixel_2_type: Used to specify the type you want to cast the pixel pointer on the plane 2 + * @callback: Callback to call for each pixels. This function should take + * (struct conversion_matrix*, @__VA_ARGS__) as parameter and return a pixel_argb_u16 + * __VA_ARGS__: Argument to pass inside the callback. You can use @pixel_1_name and @pixel_2_name + * to access current pixel values + */ +#define READ_LINE_YUV_SEMIPLANAR(function_name, pixel_1_name, pixel_2_name, pixel_1_type, \ + pixel_2_type, callback, ...) \ +static void function_name(const struct vkms_plane_state *plane, int x_start, \ + int y_start, enum pixel_read_direction direction, int count, \ + struct pixel_argb_u16 out_pixel[]) \ +{ \ + u8 *plane_1; \ + u8 *plane_2; \ + \ + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, \ + &plane_1); \ + packed_pixels_addr_1x1(plane->frame_info, \ + x_start / plane->frame_info->fb->format->hsub, \ + y_start / plane->frame_info->fb->format->vsub, 1, \ + &plane_2); \ + int step_1 = get_block_step_bytes(plane->frame_info->fb, direction, 0); \ + int step_2 = get_block_step_bytes(plane->frame_info->fb, direction, 1); \ + int subsampling = get_subsampling(plane->frame_info->fb->format, direction); \ + int subsampling_offset = get_subsampling_offset(direction, x_start, y_start); \ + const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; \ + \ + for (int i = 0; i < count; i++) { \ + pixel_1_type *(pixel_1_name) = (pixel_1_type *)plane_1; \ + pixel_2_type *(pixel_2_name) = (pixel_2_type *)plane_2; \ + *out_pixel = (callback)(conversion_matrix, __VA_ARGS__); \ + out_pixel += 1; \ + plane_1 += step_1; \ + if ((i + subsampling_offset + 1) % subsampling == 0) \ + plane_2 += step_2; \ + } \ +} + +READ_LINE_YUV_SEMIPLANAR(YUV888_semiplanar_read_line, y, uv, u8, u8, argb_u16_from_yuv161616, + y[0] * 257, uv[0] * 257, uv[1] * 257) +READ_LINE_YUV_SEMIPLANAR(YUV161616_semiplanar_read_line, y, uv, u16, u16, argb_u16_from_yuv161616, + y[0], uv[0], uv[1]) /* * This callback can be used for YUV format where each color component is * stored in a different plane (often called planar formats). It will @@ -586,8 +566,9 @@ static void planar_yuv_read_line(const struct vkms_plane_state *plane, int x_sta const struct conversion_matrix *conversion_matrix = &plane->conversion_matrix; for (int i = 0; i < count; i++) { - *out_pixel = argb_u16_from_yuv888(*y_plane, *channel_1_plane, *channel_2_plane, - conversion_matrix); + *out_pixel = argb_u16_from_yuv161616(conversion_matrix, + *y_plane * 257, *channel_1_plane * 257, + *channel_2_plane * 257); out_pixel += 1; y_plane += step_y; if ((i + subsampling_offset + 1) % subsampling == 0) { @@ -712,23 +693,43 @@ pixel_read_line_t get_pixel_read_line_function(u32 format) switch (format) { case DRM_FORMAT_ARGB8888: return &ARGB8888_read_line; - case DRM_FORMAT_XRGB8888: - return &XRGB8888_read_line; case DRM_FORMAT_ABGR8888: return &ABGR8888_read_line; + case DRM_FORMAT_BGRA8888: + return &BGRA8888_read_line; + case DRM_FORMAT_RGBA8888: + return &RGBA8888_read_line; + case DRM_FORMAT_XRGB8888: + return &XRGB8888_read_line; + case DRM_FORMAT_XBGR8888: + return &XBGR8888_read_line; + case DRM_FORMAT_RGB888: + return &RGB888_read_line; + case DRM_FORMAT_BGR888: + return &BGR888_read_line; case DRM_FORMAT_ARGB16161616: return &ARGB16161616_read_line; + case DRM_FORMAT_ABGR16161616: + return &ABGR16161616_read_line; case DRM_FORMAT_XRGB16161616: return &XRGB16161616_read_line; + case DRM_FORMAT_XBGR16161616: + return &XBGR16161616_read_line; case DRM_FORMAT_RGB565: return &RGB565_read_line; + case DRM_FORMAT_BGR565: + return &BGR565_read_line; case DRM_FORMAT_NV12: case DRM_FORMAT_NV16: case DRM_FORMAT_NV24: case DRM_FORMAT_NV21: case DRM_FORMAT_NV61: case DRM_FORMAT_NV42: - return &semi_planar_yuv_read_line; + return &YUV888_semiplanar_read_line; + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + return &YUV161616_semiplanar_read_line; case DRM_FORMAT_YUV420: case DRM_FORMAT_YUV422: case DRM_FORMAT_YUV444: diff --git a/drivers/gpu/drm/vkms/vkms_formats.h b/drivers/gpu/drm/vkms/vkms_formats.h index b4fe62ab9c65..eeb208cdd6b1 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.h +++ b/drivers/gpu/drm/vkms/vkms_formats.h @@ -14,8 +14,8 @@ void get_conversion_matrix_to_argb_u16(u32 format, enum drm_color_encoding encod struct conversion_matrix *matrix); #if IS_ENABLED(CONFIG_KUNIT) -struct pixel_argb_u16 argb_u16_from_yuv888(u8 y, u8 channel_1, u8 channel_2, - const struct conversion_matrix *matrix); +struct pixel_argb_u16 argb_u16_from_yuv161616(const struct conversion_matrix *matrix, + u16 y, u16 channel_1, u16 channel_2); #endif #endif /* _VKMS_FORMATS_H_ */ diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 8d7ca0cdd79f..2ee3749e2b28 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -77,9 +77,22 @@ int vkms_output_init(struct vkms_device *vkmsdev) return ret; } + encoder_cfg->encoder->possible_clones |= + drm_encoder_mask(encoder_cfg->encoder); + vkms_config_encoder_for_each_possible_crtc(encoder_cfg, idx, possible_crtc) { encoder_cfg->encoder->possible_crtcs |= drm_crtc_mask(&possible_crtc->crtc->crtc); + + if (vkms_config_crtc_get_writeback(possible_crtc)) { + struct drm_encoder *wb_encoder = + &possible_crtc->crtc->wb_encoder; + + encoder_cfg->encoder->possible_clones |= + drm_encoder_mask(wb_encoder); + wb_encoder->possible_clones |= + drm_encoder_mask(encoder_cfg->encoder); + } } } diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index e3fdd161d0f0..e592e47a5736 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -14,11 +14,19 @@ static const u32 vkms_formats[] = { DRM_FORMAT_ARGB8888, - DRM_FORMAT_XRGB8888, DRM_FORMAT_ABGR8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_XRGB16161616, + DRM_FORMAT_XBGR16161616, DRM_FORMAT_ARGB16161616, + DRM_FORMAT_ABGR16161616, DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV24, @@ -31,6 +39,9 @@ static const u32 vkms_formats[] = { DRM_FORMAT_YVU420, DRM_FORMAT_YVU422, DRM_FORMAT_YVU444, + DRM_FORMAT_P010, + DRM_FORMAT_P012, + DRM_FORMAT_P016, DRM_FORMAT_R1, DRM_FORMAT_R2, DRM_FORMAT_R4, diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index fe163271d5b5..45d69a3b85f6 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -174,6 +174,8 @@ int vkms_enable_writeback_connector(struct vkms_device *vkmsdev, if (ret) return ret; vkms_output->wb_encoder.possible_crtcs |= drm_crtc_mask(&vkms_output->crtc); + vkms_output->wb_encoder.possible_clones |= + drm_encoder_mask(&vkms_output->wb_encoder); drm_connector_helper_add(&wb->base, &vkms_wb_conn_helper_funcs); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index bc0342c58b4b..8ff958d119be 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -750,7 +750,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, fifo_size, - MEMREMAP_WB); + MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(dev->fifo_mem)) { drm_err(&dev->drm, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 819704ac675d..d539f25b5fbe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1497,6 +1497,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, SVGA3dCmdHeader *header) { struct vmw_bo *vmw_bo = NULL; + struct vmw_resource *res; struct vmw_surface *srf = NULL; VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdSurfaceDMA); int ret; @@ -1532,18 +1533,24 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, dirty = (cmd->body.transfer == SVGA3D_WRITE_HOST_VRAM) ? VMW_RES_DIRTY_SET : 0; - ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, - dirty, user_surface_converter, - &cmd->body.host.sid, NULL); + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, dirty, + user_surface_converter, &cmd->body.host.sid, + NULL); if (unlikely(ret != 0)) { if (unlikely(ret != -ERESTARTSYS)) VMW_DEBUG_USER("could not find surface for DMA.\n"); return ret; } - srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res); + res = sw_context->res_cache[vmw_res_surface].res; + if (!res) { + VMW_DEBUG_USER("Invalid DMA surface.\n"); + return -EINVAL; + } - vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo, header); + srf = vmw_res_to_srf(res); + vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->tbo, + header); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index c2294abbe753..00be92da5509 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -538,7 +538,7 @@ static void vmw_event_fence_action_seq_passed(struct dma_fence *f, if (likely(eaction->tv_sec != NULL)) { struct timespec64 ts; - ktime_to_timespec64(f->timestamp); + ts = ktime_to_timespec64(f->timestamp); /* monotonic time, so no y2038 overflow */ *eaction->tv_sec = ts.tv_sec; *eaction->tv_usec = ts.tv_nsec / NSEC_PER_USEC; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index c55382167c1b..eedf1fe60be7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -85,10 +85,10 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) int ret; if (drm_gem_is_imported(obj)) { - ret = dma_buf_vmap(obj->dma_buf, map); + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); if (!ret) { if (drm_WARN_ON(obj->dev, map->is_iomem)) { - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); return -EIO; } } @@ -102,7 +102,7 @@ static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { if (drm_gem_is_imported(obj)) - dma_buf_vunmap(obj->dma_buf, map); + dma_buf_vunmap(obj->import_attach->dmabuf, map); else drm_gem_ttm_vunmap(obj, map); } @@ -284,11 +284,10 @@ static void vmw_bo_print_info(int id, struct vmw_bo *bo, struct seq_file *m) seq_printf(m, "\t\t0x%08x: %12zu bytes %s, type = %s", id, bo->tbo.base.size, placement, type); - seq_printf(m, ", priority = %u, pin_count = %u, GEM refs = %d, TTM refs = %d", + seq_printf(m, ", priority = %u, pin_count = %u, GEM refs = %d", bo->tbo.priority, bo->tbo.pin_count, - kref_read(&bo->tbo.base.refcount), - kref_read(&bo->tbo.kref)); + kref_read(&bo->tbo.base.refcount)); seq_puts(m, "\n"); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 05b1c54a070c..54ea1b513950 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -500,6 +500,7 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, struct vmw_user_object *uo, struct vmw_framebuffer **out, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) @@ -548,7 +549,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, goto out_err1; } - drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, info, mode_cmd); memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); vmw_user_object_ref(&vfbs->uo); @@ -602,6 +603,7 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) @@ -634,7 +636,7 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, } vfbd->base.base.obj[0] = &bo->tbo.base; - drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, info, mode_cmd); vfbd->base.bo = true; vfbd->buffer = vmw_bo_reference(bo); *out = &vfbd->base; @@ -679,11 +681,13 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * @dev_priv: Pointer to device private struct. * @uo: Pointer to user object to wrap the kms framebuffer around. * Either the buffer or surface inside the user object must be NULL. + * @info: pixel format information. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, struct vmw_user_object *uo, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; @@ -692,10 +696,10 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, /* Create the new framebuffer depending one what we have */ if (vmw_user_object_surface(uo)) { ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, - mode_cmd); + info, mode_cmd); } else if (uo->buffer) { ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, - mode_cmd); + info, mode_cmd); } else { BUG(); } @@ -712,6 +716,7 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_private *dev_priv = vmw_priv(dev); @@ -741,7 +746,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, info, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 511e29cdb987..445471fe9be6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -399,6 +399,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, struct vmw_user_object *uo, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index 7ee93e7191c7..35dc94c3db39 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -308,8 +308,10 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, hash_add_rcu(ctx->sw_context->res_ht, &node->hash.head, node->hash.key); } node->res = vmw_resource_reference_unless_doomed(res); - if (!node->res) + if (!node->res) { + hash_del_rcu(&node->hash.head); return -ESRCH; + } node->first_usage = 1; if (!res->dev_priv->has_mob) { @@ -636,7 +638,7 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx) hash_del_rcu(&val->hash.head); list_for_each_entry(val, &ctx->resource_ctx_list, head) - hash_del_rcu(&entry->hash.head); + hash_del_rcu(&val->hash.head); ctx->sw_context = NULL; } diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 30ed74ad29ab..7219f6b884b6 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe2 Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) + depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -38,12 +40,13 @@ config DRM_XE select DRM_TTM select DRM_TTM_HELPER select DRM_EXEC + select DRM_GPUSVM if !UML && DEVICE_PRIVATE select DRM_GPUVM select DRM_SCHED select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS - select HMM_MIRROR + select REGMAP if I2C help Driver for Intel Xe2 series GPUs and later. Experimental support for Xe series is also available. @@ -85,16 +88,18 @@ config DRM_XE_GPUSVM Enable this option if you want support for CPU to GPU address mirroring. - If in doubut say "Y". + If in doubt say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 01735c6ece8b..87902b4bd6d3 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -104,6 +104,7 @@ config DRM_XE_DEBUG_GUC config DRM_XE_USERPTR_INVAL_INJECT bool "Inject userptr invalidation -EINVAL errors" + depends on DRM_GPUSVM default n help Choose this option when debugging error paths that diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f5f5775acdc0..d9c6cf0f189e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -28,6 +35,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ + xe_dep_scheduler.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -53,7 +61,6 @@ xe-y += xe_bb.o \ xe_gt_pagefault.o \ xe_gt_sysfs.o \ xe_gt_throttle.o \ - xe_gt_tlb_invalidation.o \ xe_gt_topology.o \ xe_guc.o \ xe_guc_ads.o \ @@ -68,18 +75,23 @@ xe-y += xe_bb.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ + xe_guc_tlb_inval.o \ xe_heci_gsc.o \ xe_huc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ xe_hw_engine_group.o \ + xe_hw_error.o \ xe_hw_fence.o \ xe_irq.o \ + xe_late_bind_fw.o \ xe_lrc.o \ xe_migrate.o \ xe_mmio.o \ + xe_mmio_gem.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ xe_pat.o \ @@ -87,6 +99,7 @@ xe-y += xe_bb.o \ xe_pcode.o \ xe_pm.o \ xe_preempt_fence.o \ + xe_psmi.o \ xe_pt.o \ xe_pt_walk.o \ xe_pxp.o \ @@ -106,6 +119,8 @@ xe-y += xe_bb.o \ xe_sync.o \ xe_tile.o \ xe_tile_sysfs.o \ + xe_tlb_inval.o \ + xe_tlb_inval_job.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ @@ -116,7 +131,9 @@ xe-y += xe_bb.o \ xe_tuning.o \ xe_uc.o \ xe_uc_fw.o \ + xe_validation.o \ xe_vm.o \ + xe_vm_madvise.o \ xe_vram.o \ xe_vram_freq.o \ xe_vsec.o \ @@ -124,8 +141,9 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o -xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o +xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o @@ -140,6 +158,7 @@ xe-y += \ xe_memirq.o \ xe_sriov.o \ xe_sriov_vf.o \ + xe_sriov_vf_ccs.o \ xe_tile_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ @@ -154,7 +173,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -192,6 +212,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ display/xe_dsb_buffer.o \ display/xe_fb_pin.o \ display/xe_hdcp_gsc.o \ + display/xe_panic.o \ display/xe_plane_initial.o \ display/xe_tdf.o @@ -205,7 +226,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/icl_dsi.o \ i915-display/intel_alpm.o \ i915-display/intel_atomic.o \ - i915-display/intel_atomic_plane.o \ i915-display/intel_audio.o \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ @@ -255,6 +275,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_fbc.o \ i915-display/intel_fdi.o \ i915-display/intel_fifo_underrun.o \ + i915-display/intel_flipq.o \ i915-display/intel_frontbuffer.o \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ @@ -271,6 +292,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ + i915-display/intel_plane.o \ i915-display/intel_pmdemand.o \ i915-display/intel_pch.o \ i915-display/intel_pps.o \ @@ -306,6 +328,7 @@ ifeq ($(CONFIG_DEBUG_FS),y) xe_gt_stats.o \ xe_guc_debugfs.o \ xe_huc_debugfs.o \ + xe_tile_debugfs.o \ xe_uc_debugfs.o xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o @@ -338,4 +361,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index ff4f412c28d8..31090c69dfbe 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -117,6 +117,7 @@ enum xe_guc_action { XE_GUC_ACTION_ENTER_S_STATE = 0x501, XE_GUC_ACTION_EXIT_S_STATE = 0x502, XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509, XE_GUC_ACTION_SCHED_CONTEXT = 0x1000, XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, @@ -142,6 +143,7 @@ enum xe_guc_action { XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -153,6 +155,8 @@ enum xe_guc_action { XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, XE_GUC_ACTION_LIMIT }; @@ -192,6 +196,14 @@ enum xe_guc_register_context_multi_lrc_param_offsets { XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11, }; +enum xe_guc_context_wq_item_offsets { + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN = 0, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS, + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID, + XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL, +}; + enum xe_guc_report_status { XE_GUC_REPORT_STATUS_UNKNOWN = 0x0, XE_GUC_REPORT_STATUS_ACKED = 0x1, @@ -271,4 +283,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index b28c8fa061f7..ce5c59517528 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -210,6 +210,11 @@ struct slpc_shared_data { u8 reserved_mode_definition[4096]; } __packed; +enum slpc_power_profile { + SLPC_POWER_PROFILE_BASE = 0x0, + SLPC_POWER_PROFILE_POWER_SAVING = 0x1 +}; + /** * DOC: SLPC H2G MESSAGE FORMAT * diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h index ecf748fd87df..ad76b4baf42e 100644 --- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h @@ -63,6 +63,7 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_HWCONFIG_START = 0x05, XE_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06, XE_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07, + XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH = 0x08, XE_GUC_LOAD_STATUS_GDT_DONE = 0x10, XE_GUC_LOAD_STATUS_IDT_DONE = 0x20, XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30, @@ -75,6 +76,8 @@ enum xe_guc_load_status { XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START, XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73, XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74, + XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75, + XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG = 0x76, XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END, XE_GUC_LOAD_STATUS_READY = 0xF0, diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..265a135e7061 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,8 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | + * | | | - `GuC Scheduling Policies KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -125,6 +127,57 @@ enum { }; /** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + +/** + * DOC: GuC Scheduling Policies KLVs + * + * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV. + * + * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001 + * Some platforms do not allow concurrent execution of RCS and CCS + * workloads from different address spaces. By default, the GuC prioritizes + * RCS submissions over CCS ones, which can lead to CCS workloads being + * significantly (or completely) starved of execution time. This KLV allows + * the driver to specify a quantum (in ms) and a ratio (percentage value + * between 0 and 100), and the GuC will prioritize the CCS for that + * percentage of each quantum. For example, specifying 100ms and 30% will + * make the GuC prioritize the CCS for 30ms of every 100ms. + * Note that this does not necessarly mean that RCS and CCS engines will + * only be active for their percentage of the quantum, as the restriction + * only kicks in if both classes are fully busy with non-compatible address + * spaces; i.e., if one engine is idle or running the same address space, + * a pending job on the other engine will still be submitted to the HW no + * matter what the ratio is + */ +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001 +#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. @@ -362,12 +415,14 @@ enum { */ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT = 0x9004, GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h index 41d39d67817a..f097fc6d5127 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h @@ -8,6 +8,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_res_cursor.h" +#include "xe_validation.h" struct xe_bo; @@ -21,7 +22,7 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, u32 start, u32 end) { struct xe_bo *bo; - int err; + int err = 0; u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; if (start < SZ_4K) @@ -32,21 +33,13 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, start = ALIGN(start, align); } - bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), - NULL, size, start, end, - ttm_bo_type_kernel, flags, 0); + bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe), + size, start, end, ttm_bo_type_kernel, flags); if (IS_ERR(bo)) { err = PTR_ERR(bo); bo = NULL; return err; } - err = xe_bo_pin(bo); - xe_bo_unlock_vm_held(bo); - - if (err) { - xe_bo_put(fb->bo); - bo = NULL; - } fb->bo = bo; diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 9b7572e06f34..b8269391bc69 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -12,7 +12,6 @@ #include <drm/drm_drv.h> -#include "i915_utils.h" #include "xe_device.h" /* for xe_device_has_flat_ccs() */ #include "xe_device_types.h" @@ -26,34 +25,13 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) #define IS_I915G(dev_priv) (dev_priv && 0) #define IS_I915GM(dev_priv) (dev_priv && 0) #define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) #define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) #define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) #define IS_HASWELL(dev_priv) (dev_priv && 0) #define IS_BROADWELL(dev_priv) (dev_priv && 0) -#define IS_SKYLAKE(dev_priv) (dev_priv && 0) #define IS_BROXTON(dev_priv) (dev_priv && 0) -#define IS_KABYLAKE(dev_priv) (dev_priv && 0) #define IS_GEMINILAKE(dev_priv) (dev_priv && 0) -#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) -#define IS_COMETLAKE(dev_priv) (dev_priv && 0) -#define IS_ICELAKE(dev_priv) (dev_priv && 0) -#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) -#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ - IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) -#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) -#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) -#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE) - -#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) #define IS_MOBILE(xe) (xe && 0) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index a473aa6697d0..4fcd3bf6b76f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -6,37 +6,6 @@ #ifndef __INTEL_PCODE_H__ #define __INTEL_PCODE_H__ -#include "intel_uncore.h" #include "xe_pcode.h" -static inline int -snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) -{ - return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, - slow_timeout_ms ?: 1); -} - -static inline int -snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) -{ - - return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); -} - -static inline int -snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) -{ - return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); -} - -static inline int -skl_pcode_request(struct intel_uncore *uncore, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - int timeout_base_ms) -{ - return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, - timeout_base_ms); -} - #endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 797091cf1c99..d012f02bc84f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor return xe_root_tile_mmio(xe); } -static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) -{ - struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - - return xe_device_get_root_tile(xe); -} - static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c index 43b10a2cc508..1421c2a7b64d 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_utils.c +++ b/drivers/gpu/drm/xe/display/ext/i915_utils.c @@ -4,6 +4,7 @@ */ #include "i915_drv.h" +#include "i915_utils.h" bool i915_vtd_active(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index b28a94df824f..8ea9a472113c 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -41,12 +41,12 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, size = PAGE_ALIGN(size); obj = ERR_PTR(-ENODEV); - if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + if (!IS_DGFX(xe) && !XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) { + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT, false); if (!IS_ERR(obj)) drm_info(&xe->drm, "Allocated fbdev into stolen\n"); else @@ -54,10 +54,10 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size, - ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size, + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_GGTT, false); } if (IS_ERR(obj)) { @@ -66,7 +66,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); + fb = intel_framebuffer_create(&obj->ttm.base, + drm_get_format_info(dev, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index e2e0771cf274..19e691fccf8c 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -20,7 +20,7 @@ #include "intel_audio.h" #include "intel_bw.h" #include "intel_display.h" -#include "intel_display_core.h" +#include "intel_display_device.h" #include "intel_display_driver.h" #include "intel_display_irq.h" #include "intel_display_types.h" @@ -37,13 +37,6 @@ /* Xe device functions */ -static bool has_display(struct xe_device *xe) -{ - struct intel_display *display = xe->display; - - return HAS_DISPLAY(display); -} - /** * xe_display_driver_probe_defer - Detect if we need to wait for other drivers * early on @@ -96,6 +89,7 @@ static void xe_display_fini_early(void *arg) if (!xe->info.probe_display) return; + intel_hpd_cancel_work(display); intel_display_driver_remove_nogem(display); intel_display_driver_remove_noirq(display); intel_opregion_cleanup(display); @@ -289,7 +283,7 @@ static void xe_display_enable_d3cold(struct xe_device *xe) intel_dmc_suspend(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_enable(display); } @@ -302,14 +296,14 @@ static void xe_display_disable_d3cold(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); intel_hpd_init(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -332,7 +326,7 @@ void xe_display_pm_suspend(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -340,9 +334,11 @@ void xe_display_pm_suspend(struct xe_device *xe) xe_display_flush_cleanup_work(xe); + intel_encoder_block_all_hpds(display); + intel_hpd_cancel_work(display); - if (has_display(xe)) { + if (intel_display_device_present(display)) { intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); } @@ -362,7 +358,7 @@ void xe_display_pm_shutdown(struct xe_device *xe) intel_power_domains_disable(display); drm_client_dev_suspend(&xe->drm, false); - if (has_display(xe)) { + if (intel_display_device_present(display)) { drm_kms_helper_poll_disable(&xe->drm); intel_display_driver_disable_user_access(display); intel_display_driver_suspend(display); @@ -370,9 +366,10 @@ void xe_display_pm_shutdown(struct xe_device *xe) xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(display); + intel_encoder_block_all_hpds(display); intel_hpd_cancel_work(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_suspend_access(display); intel_encoder_suspend_all(display); @@ -461,23 +458,25 @@ void xe_display_pm_resume(struct xe_device *xe) intel_dmc_resume(display); - if (has_display(xe)) + if (intel_display_device_present(display)) drm_mode_config_reset(&xe->drm); intel_display_driver_init_hw(display); - if (has_display(xe)) + if (intel_display_device_present(display)) intel_display_driver_resume_access(display); intel_hpd_init(display); - if (has_display(xe)) { + intel_encoder_unblock_all_hpds(display); + + if (intel_display_device_present(display)) { intel_display_driver_resume(display); drm_kms_helper_poll_enable(&xe->drm); intel_display_driver_enable_user_access(display); } - if (has_display(xe)) + if (intel_display_device_present(display)) intel_hpd_poll_disable(display); intel_opregion_resume(display); @@ -542,7 +541,7 @@ int xe_display_probe(struct xe_device *xe) xe->display = display; - if (has_display(xe)) + if (intel_display_device_present(display)) return 0; no_display: diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c index 68d1387d81a0..8ada1cbcb16c 100644 --- a/drivers/gpu/drm/xe/display/xe_display_wa.c +++ b/drivers/gpu/drm/xe/display/xe_display_wa.c @@ -14,5 +14,5 @@ bool intel_display_needs_wa_16023588340(struct intel_display *display) { struct xe_device *xe = to_xe_device(display->drm); - return XE_WA(xe_root_mmio_gt(xe), 16023588340); + return XE_GT_WA(xe_root_mmio_gt(xe), 16023588340); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..58581d7aaae6 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -49,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d return false; /* Set scanout flag for WC mapping */ - obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), - NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT); + obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), + PAGE_ALIGN(size), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false); if (IS_ERR(obj)) { kfree(vma); return false; @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 6b362695d6b6..1fd4a815e784 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -16,6 +16,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_pm.h" +#include "xe_vram_types.h" static void write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs, @@ -101,29 +102,29 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_PAGE_SIZE); if (IS_DGFX(xe)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM0 | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); else - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) - dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, - dpt_size, ~0ull, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_PAGETABLE, - alignment); + dpt = xe_bo_create_pin_map_at_novm(xe, tile0, + dpt_size, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE, + alignment, false); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -163,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -224,7 +228,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, bo->size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; @@ -277,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); - int ret; + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!vma) return ERR_PTR(-ENODEV); @@ -286,7 +292,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram; /* * If we need to able to access the clear-color value stored in @@ -294,7 +300,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * accessible. This is important on small-bar systems where * only some subset of VRAM is CPU accessible. */ - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) { + if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) { ret = -EINVAL; goto err; } @@ -304,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, * Pin the framebuffer, we can't use xe_bo_(un)pin functions as the * assumptions are incorrect for framebuffers */ - ret = ttm_bo_reserve(&bo->ttm, false, false, NULL); - if (ret) - goto err; - - if (IS_DGFX(xe)) - ret = xe_bo_migrate(bo, XE_PL_VRAM0); - else - ret = xe_bo_validate(bo, NULL, true); - if (!ret) - ttm_bo_pin(&bo->ttm); - ttm_bo_unreserve(&bo->ttm); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + if (IS_DGFX(xe)) + ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec); + else + ret = xe_bo_validate(bo, NULL, true, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + if (!ret) + ttm_bo_pin(&bo->ttm); + } if (ret) goto err; @@ -326,8 +337,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -381,6 +390,7 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, const struct intel_plane_state *old_plane_state) { struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb); + struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); struct xe_device *xe = to_xe_device(fb->base.dev); struct intel_display *display = xe->display; struct i915_vma *vma; @@ -404,6 +414,10 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state, found: refcount_inc(&vma->ref); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return true; } @@ -430,6 +444,10 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, return PTR_ERR(vma); new_plane_state->ggtt_vma = vma; + + new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) + + plane->surf_offset(new_plane_state); + return 0; } @@ -457,3 +475,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma) { return 0; } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + *map = vma->bo->vmap; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index b35a6f201d4a..4ae847b628e2 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, int ret = 0; /* allocate object of two page for HDCP command memory and store it */ - bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); @@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); gsc_context->hdcp_bo = bo; gsc_context->hdcp_cmd_in = cmd_in; diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c new file mode 100644 index 000000000000..f32b23338331 --- /dev/null +++ b/drivers/gpu/drm/xe/display/xe_panic.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* Copyright © 2025 Intel Corporation */ + +#include <drm/drm_cache.h> +#include <drm/drm_panic.h> + +#include "intel_display_types.h" +#include "intel_fb.h" +#include "intel_panic.h" +#include "xe_bo.h" + +struct intel_panic { + struct page **pages; + int page; + void *vaddr; +}; + +static void xe_panic_kunmap(struct intel_panic *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct intel_panic *panic = fb->panic; + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_panic *intel_panic_alloc(void) +{ + struct intel_panic *panic; + + panic = kzalloc(sizeof(*panic), GFP_KERNEL); + + return panic; +} + +int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb) +{ + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_panic_finish(struct intel_panic *panic) +{ + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index af47ce34102c..94f00def811b 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,7 +10,7 @@ #include "xe_ggtt.h" #include "xe_mmio.h" -#include "intel_atomic_plane.h" +#include "i915_vma.h" #include "intel_crtc.h" #include "intel_display.h" #include "intel_display_core.h" @@ -19,8 +19,10 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" +#include "xe_vram_types.h" #include "xe_wa.h" #include <generated/xe_wa_oob.h> @@ -103,7 +105,7 @@ initial_plane_bo(struct xe_device *xe, * We don't currently expect this to ever be placed in the * stolen portion. */ - if (phys_base >= tile0->mem.vram.usable_size) { + if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) { drm_err(&xe->drm, "Initial plane programming using invalid range, phys_base=%pa\n", &phys_base); @@ -121,7 +123,7 @@ initial_plane_bo(struct xe_device *xe, phys_base = base; flags |= XE_BO_FLAG_STOLEN; - if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) + if (XE_GT_WA(xe_root_mmio_gt(xe), 22019338487_display)) return NULL; /* @@ -138,8 +140,8 @@ initial_plane_bo(struct xe_device *xe, page_size); size -= base; - bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base, - ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base, + ttm_bo_type_kernel, flags, 0, false); if (IS_ERR(bo)) { drm_dbg(&xe->drm, "Failed to create bo phys_base=%pa size %u with flags %x: %li\n", @@ -184,7 +186,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } @@ -234,6 +236,9 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, goto nofb; plane_state->ggtt_vma = vma; + + plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma); + plane_state->uapi.src_x = 0; plane_state->uapi.src_y = 0; plane_state->uapi.src_w = fb->width << 16; diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index e3f5e8bb3ebc..c47b290e0e9f 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -65,6 +65,7 @@ #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) #define MI_LRM_USE_GGTT REG_BIT(22) +#define MI_LRM_ASYNC REG_BIT(21) #define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3)) #define MI_LRR_DST_CS_MMIO REG_BIT(19) diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 7ade41e2b7b3..f4c3e1187a00 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -111,6 +111,9 @@ #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) #define CS_PRIORITY_MEM_READ REG_BIT(7) +#define CS_DEBUG_MODE2(base) XE_REG((base) + 0xd8, XE_REG_OPTION_MASKED) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) + #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..180be82672ab 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -13,9 +13,15 @@ /* Definitions of GSC H/W registers, bits, etc */ +#define BMG_GSC_HECI1_BASE 0x373000 + #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5cd5ab8529c5..51f2a03847f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -42,7 +42,7 @@ #define FORCEWAKE_ACK_GSC XE_REG(0xdf8) #define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) -#define MCFG_MCR_SELECTOR XE_REG(0xfd0) +#define STEER_SEMAPHORE XE_REG(0xfd0) #define MTL_MCR_SELECTOR XE_REG(0xfd4) #define SF_MCR_SELECTOR XE_REG(0xfd8) #define MCR_SELECTOR XE_REG(0xfdc) @@ -342,6 +342,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) @@ -522,6 +523,7 @@ #define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED) #define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12) +#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5) #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) diff --git a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h new file mode 100644 index 000000000000..c146b9ef44eb --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_HW_ERROR_REGS_H_ +#define _XE_HW_ERROR_REGS_H_ + +#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118) +#define UNCORR_FW_REPORTED_ERR BIT(6) + +#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124) + +#define DEV_ERR_STAT_NONFATAL 0x100178 +#define DEV_ERR_STAT_CORRECTABLE 0x10017c +#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \ + DEV_ERR_STAT_CORRECTABLE, \ + DEV_ERR_STAT_NONFATAL)) +#define XE_CSC_ERROR BIT(17) +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..af781c8e4a80 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include <linux/pci_regs.h> + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..7c2a3a140142 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -18,7 +18,9 @@ #define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) +#define ERROR_IRQ(x) REG_BIT(26 + (x)) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..b5eff383902c 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index b0efd9b48d1e..264e9baf949c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,7 +5,7 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) @@ -21,4 +21,14 @@ #define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) #define SG_REMAP_BITS REG_GENMASK(31, 24) +#define BMG_MODS_RESIDENCY_OFFSET (0x4D0) +#define BMG_G2_RESIDENCY_OFFSET (0x530) +#define BMG_G6_RESIDENCY_OFFSET (0x538) +#define BMG_G8_RESIDENCY_OFFSET (0x540) +#define BMG_G10_RESIDENCY_OFFSET (0x548) + +#define BMG_PCIE_LINK_L0_RESIDENCY_OFFSET (0x570) +#define BMG_PCIE_LINK_L1_RESIDENCY_OFFSET (0x578) +#define BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET (0x580) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..1926b4044314 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 77ca1ab527ec..2294cf89f3e1 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -23,7 +23,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, bool clear, u64 get_val, u64 assign_val, - struct kunit *test) + struct kunit *test, struct drm_exec *exec) { struct dma_fence *fence; struct ttm_tt *ttm; @@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, u32 offset; /* Move bo to VRAM if not already there. */ - ret = xe_bo_validate(bo, NULL, false); + ret = xe_bo_validate(bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate bo.\n"); return ret; @@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Evict to system. CCS data should be copied. */ - ret = xe_bo_evict(bo); + ret = xe_bo_evict(bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return ret; @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, @@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, /* TODO: Sanity check */ unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; if (IS_DGFX(xe)) kunit_info(test, "Testing vram id %u\n", tile->id); else kunit_info(test, "Testing system memory\n"); - bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, + bo_flags, exec); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Verifying that CCS data is cleared on creation.\n"); ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL, - test); + test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data survives migration.\n"); ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL, - 0xdeadbeefdeadbeefULL, test); + 0xdeadbeefdeadbeefULL, test, exec); if (ret) goto out_unlock; kunit_info(test, "Verifying that CCS data can be properly cleared.\n"); - ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test); + ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec); out_unlock: xe_bo_unlock(bo); @@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc struct xe_bo *bo, *external; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; struct xe_gt *__gt; int err, i, id; @@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc for (i = 0; i < 2; ++i) { xe_vm_lock(vm, false); - bo = xe_bo_create_user(xe, NULL, vm, 0x10000, + bo = xe_bo_create_user(xe, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, exec); xe_vm_unlock(vm); if (IS_ERR(bo)) { KUNIT_FAIL(test, "bo create err=%pe\n", bo); break; } - external = xe_bo_create_user(xe, NULL, NULL, 0x10000, + external = xe_bo_create_user(xe, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags); + bo_flags, NULL); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; } xe_bo_lock(external, false); - err = xe_bo_pin_external(external); + err = xe_bo_pin_external(external, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo pin err=%pe\n", @@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc if (i) { down_read(&vm->lock); xe_vm_lock(vm, false); - err = xe_bo_validate(bo, bo->vm, false); + err = xe_bo_validate(bo, bo->vm, false, exec); xe_vm_unlock(vm); up_read(&vm->lock); if (err) { @@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc goto cleanup_all; } xe_bo_lock(external, false); - err = xe_bo_validate(external, NULL, false); + err = xe_bo_validate(external, NULL, false, exec); xe_bo_unlock(external); if (err) { KUNIT_FAIL(test, "external bo valid err=%pe\n", @@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe) INIT_LIST_HEAD(&link->link); /* We can create bos using WC caching here. But it is slower. */ - bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, + bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE, DRM_XE_GEM_CPU_CACHING_WB, - XE_BO_FLAG_SYSTEM); + XE_BO_FLAG_SYSTEM, NULL); if (IS_ERR(bo)) { if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS)) diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c53f67ce4b0a..a7e548a2bdfb 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -27,7 +27,8 @@ static bool is_dynamic(struct dma_buf_test_params *params) } static void check_residency(struct kunit *test, struct xe_bo *exported, - struct xe_bo *imported, struct dma_buf *dmabuf) + struct xe_bo *imported, struct dma_buf *dmabuf, + struct drm_exec *exec) { struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv); u32 mem_type; @@ -57,16 +58,12 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; /* - * Evict exporter. Note that the gem object dma_buf member isn't - * set from xe_gem_prime_export(), and it's needed for the move_notify() - * functionality, so hack that up here. Evicting the exported bo will + * Evict exporter. Evicting the exported bo will * evict also the imported bo through the move_notify() functionality if * importer is on a different device. If they're on the same device, * the exporter and the importer should be the same bo. */ - swap(exported->ttm.base.dma_buf, dmabuf); - ret = xe_bo_evict(exported); - swap(exported->ttm.base.dma_buf, dmabuf); + ret = xe_bo_evict(exported, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n", @@ -81,7 +78,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, } /* Re-validate the importer. This should move also exporter in. */ - ret = xe_bo_validate(imported, NULL, false); + ret = xe_bo_validate(imported, NULL, false, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) KUNIT_FAIL(test, "Validating importer failed with err=%d.\n", @@ -89,15 +86,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, return; } - /* - * If on different devices, the exporter is kept in system if - * possible, saving a migration step as the transfer is just - * likely as fast from system memory. - */ - if (params->mem_mask & XE_BO_FLAG_SYSTEM) - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); - else - KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); + KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); if (params->force_different_devices) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT)); @@ -125,8 +114,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) size = SZ_64K; kunit_info(test, "running %s\n", __func__); - bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - params->mem_mask); + bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, + params->mem_mask, NULL); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); @@ -139,6 +128,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) PTR_ERR(dmabuf)); goto out; } + bo->ttm.base.dma_buf = dmabuf; import = xe_gem_prime_import(&xe->drm, dmabuf); if (!IS_ERR(import)) { @@ -153,11 +143,12 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "xe_gem_prime_import() succeeded when it shouldn't have\n"); } else { + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; int err; /* Is everything where we expect it to be? */ xe_bo_lock(import_bo, false); - err = xe_bo_validate(import_bo, NULL, false); + err = xe_bo_validate(import_bo, NULL, false, exec); /* Pinning in VRAM is not allowed. */ if (!is_dynamic(params) && @@ -170,7 +161,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) err == -ERESTARTSYS); if (!err) - check_residency(test, bo, import_bo, dmabuf); + check_residency(test, bo, import_bo, dmabuf, exec); xe_bo_unlock(import_bo); } drm_gem_object_put(import); @@ -186,6 +177,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", PTR_ERR(import)); } + bo->ttm.base.dma_buf = NULL; dma_buf_put(dmabuf); out: drm_gem_object_put(&bo->ttm.base); @@ -206,7 +198,7 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = { static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_VRAM0 | XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, @@ -238,7 +230,8 @@ static const struct dma_buf_test_params test_params[] = { {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_NEEDS_CPU_ACCESS, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 537766cdd882..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -43,7 +43,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_EQ(test, 0, xe_ggtt_node_insert(bo->ggtt_node[tile->id], - bo->size, SZ_4K)); + xe_bo_size(bo), SZ_4K)); } return bo; diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c new file mode 100644 index 000000000000..3b213fcae916 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/delay.h> + +#include <kunit/test.h> +#include <kunit/visibility.h> + +#include "tests/xe_kunit_helpers.h" +#include "tests/xe_pci_test.h" +#include "tests/xe_test.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_pm.h" + +/* + * There are different ways to allocate the G2G buffers. The plan for this test + * is to make sure that all the possible options work. The particular option + * chosen by the driver may vary from one platform to another, it may also change + * with time. So to ensure consistency of testing, the relevant driver code is + * replicated here to guarantee it won't change without the test being updated + * to keep testing the other options. + * + * In order to test the actual code being used by the driver, there is also the + * 'default' scheme. That will use the official driver routines to test whatever + * method the driver is using on the current platform at the current time. + */ +enum { + /* Driver defined allocation scheme */ + G2G_CTB_TYPE_DEFAULT, + /* Single buffer in host memory */ + G2G_CTB_TYPE_HOST, + /* Single buffer in a specific tile, loops across all tiles */ + G2G_CTB_TYPE_TILE, +}; + +/* + * Payload is opaque to GuC. So KMD can define any structure or size it wants. + */ +struct g2g_test_payload { + u32 tx_dev; + u32 tx_tile; + u32 rx_dev; + u32 rx_tile; + u32 seqno; +}; + +static void g2g_test_send(struct kunit *test, struct xe_guc *guc, + u32 far_tile, u32 far_dev, + struct g2g_test_payload *payload) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + u32 *action, total; + size_t payload_len; + int ret; + + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); + payload_len = sizeof(*payload) / sizeof(u32); + + total = 4 + payload_len; + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); + + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; + action[1] = far_tile; + action[2] = far_dev; + action[3] = payload_len; + memcpy(action + 4, payload, payload_len * sizeof(u32)); + + atomic_inc(&xe->g2g_test_count); + + /* + * Should specify the expected response notification here. Problem is that + * the response will be coming from a different GuC. By the end, it should + * all add up as long as an equal number of messages are sent from each GuC + * and to each GuC. However, in the middle negative reservation space errors + * and such like can occur. Rather than add intrusive changes to the CT layer + * it is simpler to just not bother counting it at all. The system should be + * idle when running the selftest, and the selftest's notification total size + * is well within the G2H allocation size. So there should be no issues with + * needing to block for space, which is all the tracking code is really for. + */ + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); + kunit_kfree(test, action); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); +} + +/* + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously + * from the G2H notification handler. Need that to actually complete rather than + * thread-abort in order to keep the rest of the driver alive! + */ +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; + struct g2g_test_payload *payload; + size_t payload_len; + int ret = 0, i; + + payload_len = sizeof(*payload) / sizeof(u32); + + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); + ret = -EPROTO; + goto done; + } + + tx_tile = msg[0]; + tx_dev = msg[1]; + got_len = msg[2]; + payload = (struct g2g_test_payload *)(msg + 3); + + rx_tile = gt_to_tile(rx_gt)->id; + rx_dev = G2G_DEV(rx_gt); + + if (got_len != payload_len) { + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); + ret = -EPROTO; + goto done; + } + + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); + ret = -EPROTO; + goto done; + } + + if (!xe->g2g_test_array) { + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); + ret = -ENOMEM; + goto done; + } + + for_each_gt(test_gt, xe, i) { + if (gt_to_tile(test_gt)->id != tx_tile) + continue; + + if (G2G_DEV(test_gt) != tx_dev) + continue; + + if (tx_gt) { + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + tx_gt = test_gt; + } + if (!tx_gt) { + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); + ret = -EINVAL; + goto done; + } + + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; + + if (xe->g2g_test_array[idx] != payload->seqno - 1) { + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", + xe->g2g_test_array[idx], payload->seqno - 1, + tx_tile, tx_dev, rx_tile, rx_dev); + ret = -EINVAL; + goto done; + } + + xe->g2g_test_array[idx] = payload->seqno; + +done: + atomic_dec(&xe->g2g_test_count); + return ret; +} + +/* + * Send the given seqno from all GuCs to all other GuCs in tile/GT order + */ +static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) +{ + struct xe_gt *near_gt, *far_gt; + int i, j; + + for_each_gt(near_gt, xe, i) { + u32 near_tile = gt_to_tile(near_gt)->id; + u32 near_dev = G2G_DEV(near_gt); + + for_each_gt(far_gt, xe, j) { + u32 far_tile = gt_to_tile(far_gt)->id; + u32 far_dev = G2G_DEV(far_gt); + struct g2g_test_payload payload; + + if (far_gt->info.id == near_gt->info.id) + continue; + + payload.tx_dev = near_dev; + payload.tx_tile = near_tile; + payload.rx_dev = far_dev; + payload.rx_tile = far_tile; + payload.seqno = seqno; + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); + } + } +} + +#define WAIT_TIME_MS 100 +#define WAIT_COUNT (1000 / WAIT_TIME_MS) + +static void g2g_wait_for_complete(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + int wait = 0; + + /* Wait for all G2H messages to be received */ + while (atomic_read(&xe->g2g_test_count)) { + if (++wait > WAIT_COUNT) + break; + + msleep(WAIT_TIME_MS); + } + + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), + "Timed out waiting for notifications\n"); + kunit_info(test, "Got all notifications back\n"); +} + +#undef WAIT_TIME_MS +#undef WAIT_COUNT + +static void g2g_clean_array(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe->g2g_test_array = NULL; +} + +#define NUM_LOOPS 16 + +static void g2g_run_test(struct kunit *test, struct xe_device *xe) +{ + u32 seqno, max_array; + int ret, i, j; + + max_array = xe->info.gt_count * xe->info.gt_count; + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); + + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + + /* + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. + * Tile/GT order doesn't really mean anything to the hardware but it is going + * to be a fixed sequence every time. + * + * Verify that each one comes back having taken the correct route. + */ + ret = kunit_add_action(test, g2g_wait_for_complete, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); + for (seqno = 1; seqno < NUM_LOOPS; seqno++) + g2g_test_in_order(test, xe, seqno); + seqno--; + + kunit_release_action(test, &g2g_wait_for_complete, xe); + + /* Check for the final seqno in each slot */ + for (i = 0; i < xe->info.gt_count; i++) { + for (j = 0; j < xe->info.gt_count; j++) { + u32 idx = (j * xe->info.gt_count) + i; + + if (i == j) + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], + "identity seqno modified: %d for %dx%d!\n", + xe->g2g_test_array[idx], i, j); + else + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], + "invalid seqno: %d vs %d for %dx%d!\n", + xe->g2g_test_array[idx], seqno, i, j); + } + } + + kunit_kfree(test, xe->g2g_test_array); + kunit_release_action(test, &g2g_clean_array, xe); + + kunit_info(test, "Test passed\n"); +} + +#undef NUM_LOOPS + +static void g2g_ct_stop(struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + int i, t; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) + guc_g2g_deregister(guc, tile, dev, t); + } +} + +/* Size of a single allocation that contains all G2G CTBs across all GTs */ +static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) +{ + unsigned int count = xe->info.gt_count; + u32 num_channels = (count * (count - 1)) / 2; + + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, + num_channels * XE_G2G_TYPE_LIMIT); + + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; +} + +/* + * Use the driver's regular CTB allocation scheme. + */ +static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", + xe->info.tile_count, xe->info.gt_count); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + int ret; + + ret = guc_g2g_alloc(guc); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); + continue; + } +} + +static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) +{ + struct xe_gt *root_gt, *gt; + int i; + + root_gt = xe_device_get_gt(xe, 0); + root_gt->uc.guc.g2g.bo = bo; + root_gt->uc.guc.g2g.owned = true; + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); + + for_each_gt(gt, xe, i) { + if (gt->info.id != 0) { + gt->uc.guc.g2g.owned = false; + gt->uc.guc.g2g.bo = xe_bo_get(bo); + kunit_info(test, "[%d.%d] Pinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); + } + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); + } +} + +/* + * Allocate a single blob on the host and split between all G2G CTBs. + */ +static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) +{ + struct xe_bo *bo; + u32 g2g_size; + + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +/* + * Allocate a single blob on the given tile and split between all G2G CTBs. + */ +static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) +{ + struct xe_bo *bo; + u32 g2g_size; + + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); + + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", + tile->id, xe->info.tile_count, xe->info.gt_count); + + g2g_size = g2g_ctb_size(test, xe); + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + + g2g_distribute(test, xe, bo); +} + +static void g2g_free(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + struct xe_bo *bo; + int i; + + for_each_gt(gt, xe, i) { + bo = gt->uc.guc.g2g.bo; + if (!bo) + continue; + + if (gt->uc.guc.g2g.owned) { + xe_managed_bo_unpin_map_no_vm(bo); + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } else { + xe_bo_put(bo); + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", + gt_to_tile(gt)->id, gt->info.id, bo); + } + + gt->uc.guc.g2g.bo = NULL; + } +} + +static void g2g_stop(struct kunit *test, struct xe_device *xe) +{ + struct xe_gt *gt; + int i; + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + g2g_ct_stop(guc); + } + + g2g_free(test, xe); +} + +/* + * Generate a unique id for each bi-directional CTB for each pair of + * near and far tiles/devices. The id can then be used as an index into + * a single allocation that is sub-divided into multiple CTBs. + * + * For example, with two devices per tile and two tiles, the table should + * look like: + * Far <tile>.<dev> + * 0.0 0.1 1.0 1.1 + * N 0.0 --/-- 00/01 02/03 04/05 + * e 0.1 01/00 --/-- 06/07 08/09 + * a 1.0 03/02 07/06 --/-- 10/11 + * r 1.1 05/04 09/08 11/10 --/-- + * + * Where each entry is Rx/Tx channel id. + * + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would + * be reading from channel #11 and writing to channel #10. Whereas, + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. + */ +static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, + u32 type, u32 max_inst, bool have_dev) +{ + u32 near = near_tile, far = far_tile; + u32 idx = 0, x, y, direction; + int i; + + if (have_dev) { + near = (near << 1) | near_dev; + far = (far << 1) | far_dev; + } + + /* No need to send to one's self */ + if (far == near) + return -1; + + if (far > near) { + /* Top right table half */ + x = far; + y = near; + + /* T/R is 'forwards' direction */ + direction = type; + } else { + /* Bottom left table half */ + x = near; + y = far; + + /* B/L is 'backwards' direction */ + direction = (1 - type); + } + + /* Count the rows prior to the target */ + for (i = y; i > 0; i--) + idx += max_inst - i; + + /* Count this row up to the target */ + idx += (x - 1 - y); + + /* Slots are in Rx/Tx pairs */ + idx *= 2; + + /* Pick Rx/Tx direction */ + idx += direction; + + return idx; +} + +static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 near_tile = gt_to_tile(gt)->id; + u32 near_dev = G2G_DEV(gt); + u32 max = xe->info.gt_count; + int idx; + u32 base, desc, buf; + + if (!guc->g2g.bo) + return -ENODEV; + + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); + xe_assert(xe, idx >= 0); + + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); + desc = base + idx * G2G_DESC_SIZE; + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; + + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); + + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, + desc, buf, G2G_BUFFER_SIZE); +} + +static void g2g_start(struct kunit *test, struct xe_guc *guc) +{ + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int i; + int t, ret; + bool have_dev; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); + + /* GuC interface will need extending if more GT device types are ever created. */ + KUNIT_ASSERT_TRUE(test, + (gt->info.type == XE_GT_TYPE_MAIN) || + (gt->info.type == XE_GT_TYPE_MEDIA)); + + /* Channel numbering depends on whether there are multiple GTs per tile */ + have_dev = xe->info.gt_count > xe->info.tile_count; + + for_each_gt(remote_gt, xe, i) { + u32 tile, dev; + + if (remote_gt->info.id == gt->info.id) + continue; + + tile = gt_to_tile(remote_gt)->id; + dev = G2G_DEV(remote_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { + ret = g2g_register_flat(guc, tile, dev, t, have_dev); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); + } + } +} + +static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) +{ + struct xe_gt *gt; + int i, found = 0; + + g2g_stop(test, xe); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + KUNIT_ASSERT_NULL(test, guc->g2g.bo); + } + + switch (ctb_type) { + case G2G_CTB_TYPE_DEFAULT: + g2g_alloc_default(test, xe); + break; + + case G2G_CTB_TYPE_HOST: + g2g_alloc_host(test, xe); + break; + + case G2G_CTB_TYPE_TILE: + g2g_alloc_tile(test, xe, tile); + break; + + default: + KUNIT_ASSERT_TRUE(test, false); + } + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (!guc->g2g.bo) + continue; + + if (ctb_type == G2G_CTB_TYPE_DEFAULT) + guc_g2g_start(guc); + else + g2g_start(test, guc); + found++; + } + + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); + + kunit_info(test, "Testing across %d GTs\n", found); +} + +static void g2g_recreate_ctb(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + struct kunit *test = kunit_get_current_test(); + + g2g_stop(test, xe); + + if (xe_guc_g2g_wanted(xe)) + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); +} + +static void g2g_pm_runtime_put(void *_xe) +{ + struct xe_device *xe = (struct xe_device *)_xe; + + xe_pm_runtime_put(xe); +} + +static void g2g_pm_runtime_get(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + xe_pm_runtime_get(xe); + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); +} + +static void g2g_check_skip(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_gt *gt; + int i; + + if (IS_SRIOV_VF(xe)) + kunit_skip(test, "not supported from a VF"); + + if (xe->info.gt_count <= 1) + kunit_skip(test, "not enough GTs"); + + for_each_gt(gt, xe, i) { + struct xe_guc *guc = >->uc.guc; + + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) + kunit_skip(test, + "G2G test interface not available in production firmware builds\n"); + } +} + +/* + * Simple test that does not try to recreate the CTBs. + * Requires that the platform already enables G2G comms + * but has no risk of leaving the system in a broken state + * afterwards. + */ +static void xe_live_guc_g2g_kunit_default(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + if (!xe_guc_g2g_wanted(xe)) + kunit_skip(test, "G2G not enabled"); + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + kunit_info(test, "Testing default CTBs\n"); + g2g_run_test(test, xe); + + kunit_release_action(test, &g2g_pm_runtime_put, xe); +} + +/* + * More complex test that re-creates the CTBs in various location to + * test access to each location from each GuC. Can be run even on + * systems that do not enable G2G by default. On the other hand, + * because it recreates the CTBs, if something goes wrong it could + * leave the system with broken G2G comms. + */ +static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) +{ + struct xe_device *xe = test->priv; + int ret; + + g2g_check_skip(test); + + g2g_pm_runtime_get(test); + + /* Make sure to leave the system as we found it */ + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); + + kunit_info(test, "Testing CTB type 'default'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); + g2g_run_test(test, xe); + + kunit_info(test, "Testing CTB type 'host'...\n"); + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); + g2g_run_test(test, xe); + + if (IS_DGFX(xe)) { + struct xe_tile *tile; + int id; + + for_each_tile(tile, xe, id) { + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); + + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); + g2g_run_test(test, xe); + } + } else { + kunit_info(test, "Skipping local memory on integrated platform\n"); + } + + kunit_release_action(test, g2g_recreate_ctb, xe); + kunit_release_action(test, g2g_pm_runtime_put, xe); +} + +static struct kunit_case xe_guc_g2g_tests[] = { + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), + {} +}; + +VISIBLE_IF_KUNIT +struct kunit_suite xe_guc_g2g_test_suite = { + .name = "xe_guc_g2g", + .test_cases = xe_guc_g2g_tests, + .init = xe_kunit_helper_xe_device_live_test_init, +}; +EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c index 81277c77016d..c55e46f1ae92 100644 --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite; extern struct kunit_suite xe_dma_buf_test_suite; extern struct kunit_suite xe_migrate_test_suite; extern struct kunit_suite xe_mocs_test_suite; +extern struct kunit_suite xe_guc_g2g_test_suite; kunit_test_suite(xe_bo_test_suite); kunit_test_suite(xe_bo_shrink_test_suite); kunit_test_suite(xe_dma_buf_test_suite); kunit_test_suite(xe_migrate_test_suite); kunit_test_suite(xe_mocs_test_suite); +kunit_test_suite(xe_guc_g2g_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a65e3103f77..5904d658d1f2 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -70,28 +70,29 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe, } } while (0) static void test_copy(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test, u32 region) + struct kunit *test, u32 region, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, + exec); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); return; } - err = xe_bo_validate(remote, NULL, false); + err = xe_bo_validate(remote, NULL, false, exec); if (err) { KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n", str, err); @@ -105,7 +106,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +114,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +132,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +149,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -161,13 +162,13 @@ out_unlock: } static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { - test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec); } static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { u32 region; @@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, region = XE_BO_FLAG_VRAM1; else region = XE_BO_FLAG_VRAM0; - test_copy(m, bo, test, region); + test_copy(m, bo, test, region, exec); } -static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) +static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test, + struct drm_exec *exec) { struct xe_tile *tile = m->tile; struct xe_device *xe = tile_to_xe(tile); @@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile)); + XE_BO_FLAG_VRAM_IF_DGFX(tile), + exec); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", PTR_ERR(tiny)); @@ -245,9 +250,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -276,7 +281,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -286,19 +291,19 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); - test_copy_sysmem(m, tiny, test); + test_copy_sysmem(m, tiny, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying small buffer object to other vram\n"); - test_copy_vram(m, tiny, test); + test_copy_vram(m, tiny, exec, test); } /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -308,14 +313,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); - test_copy_sysmem(m, big, test); + test_copy_sysmem(m, big, exec, test); if (xe->info.tile_count > 1) { kunit_info(test, "Copying big buffer object to other vram\n"); - test_copy_vram(m, big, test); + test_copy_vram(m, big, exec, test); } out: @@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe) for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; + struct drm_exec *exec = XE_VALIDATION_OPT_OUT; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, false); - xe_migrate_sanity_test(m, test); + xe_migrate_sanity_test(m, test, exec); xe_vm_unlock(m->q->vm); } @@ -370,7 +376,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -490,7 +496,7 @@ err_sync: static void test_migrate(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo, - struct kunit *test) + struct drm_exec *exec, struct kunit *test) { struct dma_fence *fence; u64 expected, retval; @@ -498,7 +504,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, dma_fence_put(fence); kunit_info(test, "Evict vram buffer object\n"); - ret = xe_bo_evict(vram_bo); + ret = xe_bo_evict(vram_bo, exec); if (ret) { KUNIT_FAIL(test, "Failed to evict bo.\n"); return; @@ -523,7 +529,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -532,13 +538,13 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); kunit_info(test, "Restore vram buffer object\n"); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); return; @@ -562,7 +568,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -570,7 +576,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -583,7 +589,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -597,7 +603,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -615,7 +621,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -625,7 +631,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til { struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL; unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); + struct drm_exec *exec; long ret; - sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + sys_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(sys_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til return; } + exec = XE_VALIDATION_OPT_OUT; xe_bo_lock(sys_bo, false); - ret = xe_bo_validate(sys_bo, NULL, false); + ret = xe_bo_validate(sys_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_sysbo; @@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(ccs_bo, false); - ret = xe_bo_validate(ccs_bo, NULL, false); + ret = xe_bo_validate(ccs_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret); goto free_ccsbo; @@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + vram_bo = xe_bo_create_user(xe, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); + XE_BO_FLAG_PINNED, NULL); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); @@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_lock(vram_bo, false); - ret = xe_bo_validate(vram_bo, NULL, false); + ret = xe_bo_validate(vram_bo, NULL, false, exec); if (ret) { KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret); goto free_vrambo; @@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } test_clear(xe, tile, sys_bo, vram_bo, test); - test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test); + test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test); xe_bo_unlock(vram_bo); xe_bo_lock(vram_bo, false); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index baccb657bd05..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,17 +12,238 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> +#define PLATFORM_CASE(platform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ + .step = { .graphics = STEP_ ## graphics_step__ } \ + } + +#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ + media_verx100__, media_step__) \ + { \ + .platform = XE_ ## platform__, \ + .subplatform = XE_SUBPLATFORM_NONE, \ + .graphics_verx100 = graphics_verx100__, \ + .media_verx100 = media_verx100__, \ + .step = { .graphics = STEP_ ## graphics_step__, \ + .media = STEP_ ## media_step__ } \ + } + +static const struct xe_pci_fake_data cases[] = { + PLATFORM_CASE(TIGERLAKE, B0), + PLATFORM_CASE(DG1, A0), + PLATFORM_CASE(DG1, B0), + PLATFORM_CASE(ALDERLAKE_S, A0), + PLATFORM_CASE(ALDERLAKE_S, B0), + PLATFORM_CASE(ALDERLAKE_S, C0), + PLATFORM_CASE(ALDERLAKE_S, D0), + PLATFORM_CASE(ALDERLAKE_P, A0), + PLATFORM_CASE(ALDERLAKE_P, B0), + PLATFORM_CASE(ALDERLAKE_P, C0), + SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), + SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), + SUBPLATFORM_CASE(DG2, G10, C0), + SUBPLATFORM_CASE(DG2, G11, B1), + SUBPLATFORM_CASE(DG2, G12, A1), + GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), + GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), + GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), + GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), + GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0), +}; + +KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); + +/** + * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct xe_pci_fake_data parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc) +{ + return platform_gen_params(test, prev, desc); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params); + +static const struct xe_device_desc *lookup_desc(enum xe_platform p) +{ + const struct xe_device_desc *desc; + const struct pci_device_id *ids; + + for (ids = pciidlist; ids->driver_data; ids++) { + desc = (const void *)ids->driver_data; + if (desc->platform == p) + return desc; + } + return NULL; +} + +static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_device_desc *desc = lookup_desc(p); + const struct xe_subplatform_desc *spd; + + if (desc && desc->subplatforms) + for (spd = desc->subplatforms; spd->subplatform; spd++) + if (spd->subplatform == s) + return spd; + return NULL; +} + +static const char *lookup_platform_name(enum xe_platform p) +{ + const struct xe_device_desc *desc = lookup_desc(p); + + return desc ? desc->platform_name : "INVALID"; +} + +static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s); + + return desc ? desc->name : "INVALID"; +} + +static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s); +} + +static const char *subplatform_prefix(enum xe_subplatform s) +{ + return s == XE_SUBPLATFORM_NONE ? "" : " "; +} + +static const char *step_prefix(enum xe_step step) +{ + return step == STEP_NONE ? "" : " "; +} + +static const char *step_name(enum xe_step step) +{ + return step == STEP_NONE ? "" : xe_step_name(step); +} + +static const char *sriov_prefix(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : " "; +} + +static const char *sriov_name(enum xe_sriov_mode mode) +{ + return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode); +} + +static const char *lookup_graphics_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_graphics_ip(verx100); + + return ip ? ip->name : ""; +} + +static const char *lookup_media_name(unsigned int verx100) +{ + const struct xe_ip *ip = find_media_ip(verx100); + + return ip ? ip->name : ""; +} + +/** + * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter + * @param: the &struct xe_pci_fake_data parameter to describe + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares description of the struct xe_pci_fake_data parameter. + * + * It is tailored for use in parameterized KUnit tests where parameter generator + * is based on the struct xe_pci_fake_data arrays. + */ +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc) +{ + if (param->graphics_verx100 || param->media_verx100) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + param->graphics_verx100 / 100, param->graphics_verx100 % 100, + lookup_graphics_name(param->graphics_verx100), + step_prefix(param->step.graphics), step_name(param->step.graphics), + param->media_verx100 / 100, param->media_verx100 % 100, + lookup_media_name(param->media_verx100), + step_prefix(param->step.media), step_name(param->step.media), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); + else + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s", + lookup_platform_name(param->platform), + subplatform_prefix(param->subplatform), + lookup_subplatform_name(param->platform, param->subplatform), + step_prefix(param->step.graphics), step_name(param->step.graphics), + sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode)); +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc); + static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) { snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s", param->verx100 / 100, param->verx100 % 100, param->name); } +/* + * Pre-GMDID Graphics and Media IPs definitions. + * + * Mimic the way GMDID IPs are declared so the same + * param generator can be used for both + */ +static const struct xe_ip pre_gmdid_graphics_ips[] = { + { 1200, "Xe_LP", &graphics_xelp }, + { 1210, "Xe_LP+", &graphics_xelp }, + { 1255, "Xe_HPG", &graphics_xehpg }, + { 1260, "Xe_HPC", &graphics_xehpc }, +}; + +static const struct xe_ip pre_gmdid_media_ips[] = { + { 1200, "Xe_M", &media_xem }, + { 1255, "Xe_HPM", &media_xem }, +}; + +KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc); +KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc); + KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -32,14 +253,22 @@ KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); * * Return: pointer to the next parameter or NULL if no more parameters */ -const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc) +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc) { - return graphics_ip_gen_params(prev, desc); + const void *next = pre_gmdid_graphics_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_graphics_ips)) + prev = NULL; + + return graphics_ip_gen_params(test, prev, desc); } EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -49,12 +278,39 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); * * Return: pointer to the next parameter or NULL if no more parameters */ -const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc) { - return media_ip_gen_params(prev, desc); + const void *next = pre_gmdid_media_ip_gen_params(test, prev, desc); + + if (next) + return next; + if (is_insidevar(prev, pre_gmdid_media_ips)) + prev = NULL; + + return media_ip_gen_params(test, prev, desc); } EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(test, prev, desc); + + return pci->driver_data ? pci : NULL; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); + static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { @@ -63,13 +319,18 @@ static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, if (type == GMDID_MEDIA) { *ver = data->media_verx100; - *revid = xe_step_to_gmdid(data->media_step); + *revid = xe_step_to_gmdid(data->step.media); } else { *ver = data->graphics_verx100; - *revid = xe_step_to_gmdid(data->graphics_step); + *revid = xe_step_to_gmdid(data->step.graphics); } } +static void fake_xe_info_probe_tile_count(struct xe_device *xe) +{ + /* Nothing to do, just use the statically defined value. */ +} + int xe_pci_fake_device_init(struct xe_device *xe) { struct kunit *test = kunit_get_current_test(); @@ -107,6 +368,8 @@ done: data->sriov_mode : XE_SRIOV_MODE_NONE; kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); + kunit_activate_static_stub(test, xe_info_probe_tile_count, + fake_xe_info_probe_tile_count); xe_info_init_early(xe, desc, subplatform_desc); xe_info_init(xe, desc); @@ -117,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * @@ -128,7 +392,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); * Return: pointer to the next &struct xe_device ready to be used as a parameter * or NULL if there are no more Xe devices on the system. */ -const void *xe_pci_live_device_gen_param(const void *prev, char *desc) +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc) { const struct xe_device *xe = prev; struct device *dev = xe ? xe->drm.dev : NULL; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 95fed41f7ff2..37b344df2dc3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -44,9 +44,21 @@ static void check_media_ip(struct kunit *test) KUNIT_ASSERT_EQ(test, mask, 0); } +static void check_platform_gt_count(struct kunit *test) +{ + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; + int max_gt = desc->max_gt_per_tile; + + KUNIT_ASSERT_GT(test, max_gt, 0); + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); +} + static struct kunit_case xe_pci_tests[] = { KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3a1df7a5e291..30505d1cbefc 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -7,9 +7,11 @@ #define _XE_PCI_TEST_H_ #include <linux/types.h> +#include <kunit/test.h> #include "xe_platform_types.h" #include "xe_sriov_types.h" +#include "xe_step_types.h" struct xe_device; @@ -17,16 +19,18 @@ struct xe_pci_fake_data { enum xe_sriov_mode sriov_mode; enum xe_platform platform; enum xe_subplatform subplatform; + struct xe_step_info step; u32 graphics_verx100; u32 media_verx100; - u32 graphics_step; - u32 media_step; }; int xe_pci_fake_device_init(struct xe_device *xe); +const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc); +void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc); -const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); -const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); -const void *xe_pci_live_device_gen_param(const void *prev, char *desc); +const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc); +const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index c96d1fe34151..49d191043dfa 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -15,86 +15,10 @@ #include "xe_tuning.h" #include "xe_wa.h" -struct platform_test_case { - const char *name; - enum xe_platform platform; - enum xe_subplatform subplatform; - u32 graphics_verx100; - u32 media_verx100; - struct xe_step_info step; -}; - -#define PLATFORM_CASE(platform__, graphics_step__) \ - { \ - .name = #platform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - - -#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \ - { \ - .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \ - .step = { .graphics = STEP_ ## graphics_step__ } \ - } - -#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \ - media_verx100__, media_step__) \ - { \ - .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\ - .platform = XE_ ## platform__, \ - .subplatform = XE_SUBPLATFORM_NONE, \ - .graphics_verx100 = graphics_verx100__, \ - .media_verx100 = media_verx100__, \ - .step = { .graphics = STEP_ ## graphics_step__, \ - .media = STEP_ ## media_step__ } \ - } - -static const struct platform_test_case cases[] = { - PLATFORM_CASE(TIGERLAKE, B0), - PLATFORM_CASE(DG1, A0), - PLATFORM_CASE(DG1, B0), - PLATFORM_CASE(ALDERLAKE_S, A0), - PLATFORM_CASE(ALDERLAKE_S, B0), - PLATFORM_CASE(ALDERLAKE_S, C0), - PLATFORM_CASE(ALDERLAKE_S, D0), - PLATFORM_CASE(ALDERLAKE_P, A0), - PLATFORM_CASE(ALDERLAKE_P, B0), - PLATFORM_CASE(ALDERLAKE_P, C0), - SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), - SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), - SUBPLATFORM_CASE(DG2, G10, C0), - SUBPLATFORM_CASE(DG2, G11, B1), - SUBPLATFORM_CASE(DG2, G12, A1), - GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), - GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), - GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), - GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), - GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1), -}; - -static void platform_desc(const struct platform_test_case *t, char *desc) -{ - strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE); -} - -KUNIT_ARRAY_PARAM(platform, cases, platform_desc); - static int xe_wa_test_init(struct kunit *test) { - const struct platform_test_case *param = test->param_value; - struct xe_pci_fake_data data = { - .platform = param->platform, - .subplatform = param->subplatform, - .graphics_verx100 = param->graphics_verx100, - .media_verx100 = param->media_verx100, - .graphics_step = param->step.graphics, - .media_step = param->step.media, - }; + const struct xe_pci_fake_data *param = test->param_value; + struct xe_pci_fake_data data = *param; struct xe_device *xe; struct device *dev; int ret; @@ -119,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test) return 0; } -static void xe_wa_test_exit(struct kunit *test) -{ - struct xe_device *xe = test->priv; - - drm_kunit_helper_free_device(test, xe->drm.dev); -} - static void xe_wa_gt(struct kunit *test) { struct xe_device *xe = test->priv; @@ -143,14 +60,13 @@ static void xe_wa_gt(struct kunit *test) } static struct kunit_case xe_wa_tests[] = { - KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params), + KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params), {} }; static struct kunit_suite xe_rtp_test_suite = { .name = "xe_wa", .init = xe_wa_test_init, - .exit = xe_wa_test_exit, .test_cases = xe_wa_tests, }; diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 68fe70ce2be3..a818eaa05b7d 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -12,6 +12,7 @@ #include "xe_gt_types.h" #include "xe_step.h" +#include "xe_vram.h" /** * DOC: Xe Asserts @@ -145,7 +146,8 @@ const struct xe_tile *__tile = (tile); \ char __buf[10] __maybe_unused; \ xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \ - __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \ + __tile->id, ({ string_get_size( \ + xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \ STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \ }) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..6d20229c11de 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. @@ -60,6 +60,41 @@ err: return ERR_PTR(err); } +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id) +{ + struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); + struct xe_device *xe = gt_to_xe(gt); + struct xe_sa_manager *bb_pool; + int err; + + if (!bb) + return ERR_PTR(-ENOMEM); + /* + * We need to allocate space for the requested number of dwords & + * one additional MI_BATCH_BUFFER_END dword. Since the whole SA + * is submitted to HW, we need to make sure that the last instruction + * is not over written when the last chunk of SA is allocated for BB. + * So, this extra DW acts as a guard here. + */ + + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; + bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1)); + + if (IS_ERR(bb->bo)) { + err = PTR_ERR(bb->bo); + goto err; + } + + bb->cs = xe_sa_bo_cpu_addr(bb->bo); + bb->len = 0; + + return bb; +err: + kfree(bb); + return ERR_PTR(err); +} + static struct xe_sched_job * __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..2a8adc9a6dee 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -13,8 +13,11 @@ struct dma_fence; struct xe_gt; struct xe_exec_queue; struct xe_sched_job; +enum xe_sriov_vf_ccs_rw_ctxs; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); +struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords, + enum xe_sriov_vf_ccs_rw_ctxs ctx_id); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4e39188a021a..4410e28dee54 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -19,6 +19,8 @@ #include <kunit/static_stub.h> +#include <trace/events/gpu_mem.h> + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -31,9 +33,11 @@ #include "xe_pxp.h" #include "xe_res_cursor.h" #include "xe_shrinker.h" +#include "xe_sriov_vf_ccs.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +#include "xe_vram_types.h" const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { [XE_PL_SYSTEM] = "system", @@ -184,6 +188,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_TT, + .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ? + TTM_PL_FLAG_FALLBACK : 0, }; *c += 1; } @@ -196,6 +202,8 @@ static bool force_contiguous(u32 bo_flags) else if (bo_flags & XE_BO_FLAG_PINNED && !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) return true; /* needs vmap */ + else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR) + return true; /* * For eviction / restore on suspend / resume objects pinned in VRAM @@ -418,6 +426,19 @@ static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); } +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif +} + static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, u32 page_flags) { @@ -437,7 +458,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -525,6 +546,7 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, xe_tt->purgeable = false; xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } @@ -541,6 +563,7 @@ static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) ttm_pool_free(&ttm_dev->pool, tt); xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) @@ -795,14 +818,14 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; } - tt_has_data = ttm && (ttm_tt_is_populated(ttm) || - (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); + tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm)); move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : (!mem_type_is_vram(old_mem_type) && !tt_has_data)); @@ -947,6 +970,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); xe_pm_runtime_put(xe); + /* + * CCS meta data is migrated from TT -> SMEM. So, let us detach the + * BBs from BO as it is no longer needed. + */ + if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT && + new_mem->mem_type == XE_PL_SYSTEM) + xe_sriov_vf_ccs_detach_bo(bo); + + if (IS_VF_CCS_READY(xe) && + ((move_lacks_source && new_mem->mem_type == XE_PL_TT) || + (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) && + handle_system_ccs) + ret = xe_sriov_vf_ccs_attach_bo(bo); + out: if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) && ttm_bo->ttm) { @@ -957,6 +994,9 @@ out: if (timeout < 0) ret = timeout; + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_detach_bo(bo); + xe_tt_unmap_sg(xe, ttm_bo->ttm); } @@ -1101,42 +1141,47 @@ out_unref: int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_bo *backup; int ret = 0; - xe_bo_lock(bo, false); + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + xe_assert(xe, !bo->backup_obj); - xe_assert(xe, !bo->backup_obj); + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + break; - /* - * Since this is called from the PM notifier we might have raced with - * someone unpinning this after we dropped the pinned list lock and - * grabbing the above bo lock. - */ - if (!xe_bo_is_pinned(bo)) - goto out_unlock_bo; + if (!xe_bo_is_vram(bo)) + break; - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - ttm_bo_pin(&backup->ttm); - bo->backup_obj = backup; - -out_unlock_bo: - xe_bo_unlock(bo); return ret; } @@ -1162,56 +1207,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) return 0; } -/** - * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory - * @bo: The buffer object to move. - * - * On successful completion, the object memory will be moved to system memory. - * - * This is needed to for special handling of pinned VRAM object during - * suspend-resume. - * - * Return: 0 on success. Negative error code on failure. - */ -int xe_bo_evict_pinned(struct xe_bo *bo) +static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup) { - struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_bo *backup = bo->backup_obj; - bool backup_created = false; + struct xe_device *xe = xe_bo_device(bo); bool unmap = false; int ret = 0; - xe_bo_lock(bo, false); - - if (WARN_ON(!bo->ttm.resource)) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (WARN_ON(!xe_bo_is_pinned(bo))) { - ret = -EINVAL; - goto out_unlock_bo; - } - - if (!xe_bo_is_vram(bo)) - goto out_unlock_bo; - - if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) - goto out_unlock_bo; - - if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; - } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ - backup_created = true; - } - if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { struct xe_migrate *migrate; struct dma_fence *fence; @@ -1221,14 +1222,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo) else migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); + xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv); ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); if (ret) goto out_backup; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_backup; - fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, backup->ttm.resource, false); if (IS_ERR(fence)) { @@ -1238,8 +1236,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1249,25 +1245,88 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (iosys_map_is_null(&bo->vmap)) { ret = xe_bo_vmap(bo); if (ret) - goto out_backup; + goto out_vunmap; unmap = true; } xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, - bo->size); + xe_bo_size(bo)); } if (!bo->backup_obj) bo->backup_obj = backup; - -out_backup: +out_vunmap: xe_bo_vunmap(backup); - if (ret && backup_created) - xe_bo_put(backup); -out_unlock_bo: +out_backup: if (unmap) xe_bo_vunmap(bo); - xe_bo_unlock(bo); + + return ret; +} + +/** + * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory + * @bo: The buffer object to move. + * + * On successful completion, the object memory will be moved to system memory. + * + * This is needed to for special handling of pinned VRAM object during + * suspend-resume. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_evict_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + xe_assert(xe, !ret); + + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + break; + } + + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + break; + } + + if (!xe_bo_is_vram(bo)) + break; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + break; + + if (!backup) { + backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, + xe_bo_size(bo), + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED, &exec); + if (IS_ERR(backup)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(backup); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; + } + + ret = xe_bo_evict_pinned_copy(bo, backup); + } + + if (ret && backup_created) + xe_bo_put(backup); + return ret; } @@ -1317,10 +1376,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (ret) goto out_unlock_bo; - ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); - if (ret) - goto out_unlock_bo; - fence = xe_migrate_copy(migrate, backup, bo, backup->ttm.resource, bo->ttm.resource, false); @@ -1331,8 +1386,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) dma_resv_add_fence(bo->ttm.base.resv, fence, DMA_RESV_USAGE_KERNEL); - dma_resv_add_fence(backup->ttm.base.resv, fence, - DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } else { ret = xe_bo_vmap(backup); @@ -1347,7 +1400,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) } xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, - bo->size); + xe_bo_size(bo)); } bo->backup_obj = NULL; @@ -1483,9 +1536,14 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo) static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) { + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + if (!xe_bo_is_xe_bo(ttm_bo)) return; + if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev))) + xe_sriov_vf_ccs_detach_bo(bo); + /* * Object is idle and about to be destroyed. Release the * dma-buf attachment. @@ -1558,7 +1616,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1667,50 +1725,258 @@ static void xe_gem_object_close(struct drm_gem_object *obj, } } -static vm_fault_t xe_gem_fault(struct vm_fault *vmf) +static bool should_migrate_to_smem(struct xe_bo *bo) +{ + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + + return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL || + bo->attr.atomic_access == DRM_XE_ATOMIC_CPU; +} + +static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx) +{ + long lerr; + + if (ctx->no_wait_gpu) + return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ? + 0 : -EBUSY; + + lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, + ctx->interruptible, MAX_SCHEDULE_TIMEOUT); + if (lerr < 0) + return lerr; + if (lerr == 0) + return -EBUSY; + + return 0; +} + +/* Populate the bo if swapped out, or migrate if the access mode requires that. */ +static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx, + struct drm_exec *exec) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + int err = 0; + + if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) { + err = xe_bo_wait_usage_kernel(bo, ctx); + if (!err) + err = ttm_bo_populate(&bo->ttm, ctx); + } else if (should_migrate_to_smem(bo)) { + xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM); + err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec); + } + + return err; +} + +/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */ +static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo) +{ + vm_fault_t ret; + + trace_xe_bo_cpu_fault(bo); + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT); + /* + * When TTM is actually called to insert PTEs, ensure no blocking conditions + * remain, in which case TTM may drop locks and return VM_FAULT_RETRY. + */ + xe_assert(xe, ret != VM_FAULT_RETRY); + + if (ret == VM_FAULT_NOPAGE && + mem_type_is_vram(bo->ttm.resource->mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (list_empty(&bo->vram_userfault_link)) + list_add(&bo->vram_userfault_link, + &xe->mem_access.vram_userfault.list); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } + + return ret; +} + +static vm_fault_t xe_err_to_fault_t(int err) +{ + switch (err) { + case 0: + case -EINTR: + case -ERESTARTSYS: + case -EAGAIN: + return VM_FAULT_NOPAGE; + case -ENOMEM: + case -ENOSPC: + return VM_FAULT_OOM; + default: + break; + } + return VM_FAULT_SIGBUS; +} + +static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo) +{ + dma_resv_assert_held(tbo->base.resv); + + return tbo->ttm && + (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) == + TTM_TT_FLAG_EXTERNAL; +} + +static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe, + struct xe_bo *bo, bool needs_rpm) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + vm_fault_t ret = VM_FAULT_RETRY; + struct xe_validation_ctx ctx; + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = true, + .gfp_retry_mayfail = true, + + }; + int err; + + if (needs_rpm && !xe_pm_runtime_get_if_active(xe)) + return VM_FAULT_RETRY; + + err = xe_validation_ctx_init(&ctx, &xe->val, NULL, + (struct xe_val_flags) { + .interruptible = true, + .no_block = true + }); + if (err) + goto out_pm; + + if (!dma_resv_trylock(tbo->base.resv)) + goto out_validation; + + if (xe_ttm_bo_is_imported(tbo)) { + ret = VM_FAULT_SIGBUS; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + goto out_unlock; + } + + err = xe_bo_fault_migrate(bo, &tctx, NULL); + if (err) { + /* Return VM_FAULT_RETRY on these errors. */ + if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY) + ret = xe_err_to_fault_t(err); + goto out_unlock; + } + + if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) + ret = __xe_bo_cpu_fault(vmf, xe, bo); + +out_unlock: + dma_resv_unlock(tbo->base.resv); +out_validation: + xe_validation_ctx_fini(&ctx); +out_pm: + if (needs_rpm) + xe_pm_runtime_put(xe); + + return ret; +} + +static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; struct drm_device *ddev = tbo->base.dev; struct xe_device *xe = to_xe_device(ddev); struct xe_bo *bo = ttm_to_xe_bo(tbo); bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; + bool retry_after_wait = false; + struct xe_validation_ctx ctx; + struct drm_exec exec; vm_fault_t ret; + int err = 0; int idx; - if (needs_rpm) - xe_pm_runtime_get(xe); + if (!drm_dev_enter(&xe->drm, &idx)) + return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); - ret = ttm_bo_vm_reserve(tbo, vmf); - if (ret) + ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm); + if (ret != VM_FAULT_RETRY) goto out; - if (drm_dev_enter(ddev, &idx)) { - trace_xe_bo_cpu_fault(bo); - - ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, - TTM_BO_VM_NUM_PREFAULT); - drm_dev_exit(idx); + if (fault_flag_allow_retry_first(vmf->flags)) { + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) + goto out; + retry_after_wait = true; + xe_bo_get(bo); + mmap_read_unlock(vmf->vma->vm_mm); } else { - ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); + ret = VM_FAULT_NOPAGE; } - if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - goto out; /* - * ttm_bo_vm_reserve() already has dma_resv_lock. + * The fastpath failed and we were not required to return and retry immediately. + * We're now running in one of two modes: + * + * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying + * to resolve blocking waits. But we can't resolve the fault since the + * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath + * should succeed. But it may fail since we drop the bo lock. + * + * 2) retry_after_wait == false: The fastpath failed, typically even after + * a retry. Do whatever's necessary to resolve the fault. + * + * This construct is recommended to avoid excessive waits under the mmap_lock. */ - if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) { - mutex_lock(&xe->mem_access.vram_userfault.lock); - if (list_empty(&bo->vram_userfault_link)) - list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list); - mutex_unlock(&xe->mem_access.vram_userfault.lock); + + if (needs_rpm) + xe_pm_runtime_get(xe); + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + struct ttm_operation_ctx tctx = { + .interruptible = true, + .no_wait_gpu = false, + .gfp_retry_mayfail = retry_after_wait, + }; + + err = drm_exec_lock_obj(&exec, &tbo->base); + drm_exec_retry_on_contention(&exec); + if (err) + break; + + if (xe_ttm_bo_is_imported(tbo)) { + err = -EFAULT; + drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n"); + break; + } + + err = xe_bo_fault_migrate(bo, &tctx, &exec); + if (err) { + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + + err = xe_bo_wait_usage_kernel(bo, &tctx); + if (err) + break; + + if (!retry_after_wait) + ret = __xe_bo_cpu_fault(vmf, xe, bo); } + /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */ + if (err && !retry_after_wait) + ret = xe_err_to_fault_t(err); - dma_resv_unlock(tbo->base.resv); -out: if (needs_rpm) xe_pm_runtime_put(xe); + if (retry_after_wait) + xe_bo_put(bo); +out: + drm_dev_exit(idx); + return ret; } @@ -1754,7 +2020,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) } static const struct vm_operations_struct xe_gem_vm_ops = { - .fault = xe_gem_fault, + .fault = xe_bo_cpu_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, .access = xe_bo_vm_access, @@ -1802,11 +2068,32 @@ void xe_bo_free(struct xe_bo *bo) kfree(bo); } -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags) +/** + * xe_bo_init_locked() - Initialize or create an xe_bo. + * @xe: The xe device. + * @bo: An already allocated buffer object or NULL + * if the function should allocate a new one. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @resv: Pointer to a locked shared reservation object to use fo this bo, + * or NULL for the xe_bo to use its own. + * @bulk: The bulk move to use for LRU bumping, or NULL for external bos. + * @size: The storage size to use for the bo. + * @cpu_caching: The cpu caching used for system memory backing store. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Initialize or create an xe buffer object. On failure, any allocated buffer + * object passed in @bo will have been unreferenced. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, @@ -1858,7 +2145,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -1876,6 +2162,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, ctx.resv = resv; } + xe_validation_assert_exec(xe, exec, &bo->ttm.base); if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { err = __xe_bo_placement_for_flags(xe, bo, bo->flags); if (WARN_ON(err)) { @@ -1977,7 +2264,7 @@ __xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u64 start, u64 end, u16 cpu_caching, enum ttm_bo_type type, u32 flags, - u64 alignment) + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo = NULL; int err; @@ -1998,11 +2285,11 @@ __xe_bo_create_locked(struct xe_device *xe, } } - bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, - vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_FLAG_USER ? - &vm->lru_bulk_move : NULL, size, - cpu_caching, type, flags); + bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, + vm && !xe_vm_in_fault_mode(vm) && + flags & XE_BO_FLAG_USER ? + &vm->lru_bulk_move : NULL, size, + cpu_caching, type, flags, exec); if (IS_ERR(bo)) return bo; @@ -2036,9 +2323,10 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX, + exec); } else { - err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec); } if (err) goto err_unlock_put_bo; @@ -2055,82 +2343,166 @@ err_unlock_put_bo: return ERR_PTR(err); } -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment) -{ - return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, - flags, alignment); -} - +/** + * xe_bo_create_locked() - Create a BO + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Create a locked xe BO with no range- nor alignment restrictions. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, - flags, 0); + flags, 0, exec); } -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags) +static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u16 cpu_caching, + enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) { - struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, ttm_bo_type_device, - flags | XE_BO_FLAG_USER, 0); - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; - return bo; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL, + cpu_caching, type, flags, alignment, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } else { + xe_bo_unlock(bo); + } + } + + return ret ? ERR_PTR(ret) : bo; } -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_user() - Create a user BO + * @xe: The xe device. + * @vm: The local vm or NULL for external objects. + * @size: The storage size to use for the bo. + * @cpu_caching: The caching mode to be used for system backing store. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL + * if such a transaction should be initiated by the call. + * + * Create a bo on behalf of user-space. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_user(struct xe_device *xe, + struct xe_vm *vm, size_t size, + u16 cpu_caching, + u32 flags, struct drm_exec *exec) { - struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags); + struct xe_bo *bo; + + flags |= XE_BO_FLAG_USER; - if (!IS_ERR(bo)) - xe_bo_unlock_vm_held(bo); + if (vm || exec) { + xe_assert(xe, exec); + bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL, + cpu_caching, ttm_bo_type_device, + flags, 0, exec); + if (!IS_ERR(bo)) + xe_bo_unlock_vm_held(bo); + } else { + bo = xe_bo_create_novm(xe, NULL, size, cpu_caching, + ttm_bo_type_device, flags, 0, true); + } return bo; } -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_pin_range_novm() - Create and pin a BO with range options. + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @start: Start of fixed VRAM range or 0. + * @end: End of fixed VRAM range or ~0ULL. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * + * Create an Xe BO with range- and options. If @start and @end indicate + * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement + * only. + * + * Return: The buffer object on success. Negative error pointer on failure. + */ +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags) { - return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset, - type, flags, 0); + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end, + 0, type, flags, 0, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + + err = xe_bo_pin(bo, &exec); + xe_bo_unlock(bo); + if (err) { + xe_bo_put(bo); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + + return err ? ERR_PTR(err) : bo; } -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment) +static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, + struct xe_tile *tile, + struct xe_vm *vm, + size_t size, u64 offset, + enum ttm_bo_type type, u32 flags, + u64 alignment, struct drm_exec *exec) { struct xe_bo *bo; int err; u64 start = offset == ~0ull ? 0 : offset; - u64 end = offset == ~0ull ? offset : start + size; + u64 end = offset == ~0ull ? ~0ull : start + size; if (flags & XE_BO_FLAG_STOLEN && xe_ttm_stolen_cpu_access_needs_ggtt(xe)) flags |= XE_BO_FLAG_GGTT; - bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, - alignment); + bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED, + alignment, exec); if (IS_ERR(bo)) return bo; - err = xe_bo_pin(bo); + err = xe_bo_pin(bo, exec); if (err) goto err_put; @@ -2150,26 +2522,100 @@ err_put: return ERR_PTR(err); } +/** + * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @offset: Optional VRAM offset or %~0ull for don't care. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @alignment: GGTT alignment. + * @intr: Whether to execute any waits for backing store interruptible. + * + * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment + * options. The bo will be external and not associated with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, u32 flags, + u64 alignment, bool intr) +{ + struct xe_validation_ctx ctx; + struct drm_exec exec; + struct xe_bo *bo; + int ret = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr}, + ret) { + bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset, + type, flags, alignment, &exec); + if (IS_ERR(bo)) { + drm_exec_retry_on_contention(&exec); + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + } + } + + return ret ? ERR_PTR(ret) : bo; +} + +/** + * xe_bo_create_pin_map() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * @vm: The vm to associate the buffer object with. The vm's resv must be locked + * with the transaction represented by @exec. + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @exec: The drm_exec transaction to use for exhaustive eviction, and + * previously used for locking @vm's resv. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @exec was + * configured for interruptible locking. + */ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags) + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec) { - return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); + return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags, + 0, exec); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) +/** + * xe_bo_create_pin_map_novm() - Create pinned and mapped bo + * @xe: The xe device. + * @tile: The tile to select for migration of this bo, and the tile used for + * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos. + * @size: The storage size to use for the bo. + * @type: The TTM buffer object type. + * @flags: XE_BO_FLAG_ flags. + * @intr: Whether to execut any waits for backing store interruptible. + * + * Create a pinned and mapped bo. The bo will be external and not associated + * with a VM. + * + * Return: The buffer object on success. Negative error pointer on failure. + * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set + * to true on entry. + */ +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr) { - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; + return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr); } static void __xe_bo_unpin_map_no_vm(void *arg) @@ -2184,8 +2630,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile int ret; KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags); - - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags); + bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true); if (IS_ERR(bo)) return bo; @@ -2196,6 +2641,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile return bo; } +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo) +{ + devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo); +} + struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags) { @@ -2234,7 +2684,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2267,6 +2717,8 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) /** * xe_bo_pin_external - pin an external BO * @bo: buffer object to be pinned + * @in_place: Pin in current placement, don't attempt to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD) * BO. Unique call compared to xe_bo_pin as this function has it own set of @@ -2274,7 +2726,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res) * * Returns 0 for success, negative error code otherwise. */ -int xe_bo_pin_external(struct xe_bo *bo) +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec) { struct xe_device *xe = xe_bo_device(bo); int err; @@ -2283,9 +2735,11 @@ int xe_bo_pin_external(struct xe_bo *bo) xe_assert(xe, xe_bo_is_user(bo)); if (!xe_bo_is_pinned(bo)) { - err = xe_bo_validate(bo, NULL, false); - if (err) - return err; + if (!in_place) { + err = xe_bo_validate(bo, NULL, false, exec); + if (err) + return err; + } spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.late.external); @@ -2305,7 +2759,17 @@ int xe_bo_pin_external(struct xe_bo *bo) return 0; } -int xe_bo_pin(struct xe_bo *bo) +/** + * xe_bo_pin() - Pin a kernel bo after potentially migrating it + * @bo: The kernel bo to pin. + * @exec: The drm_exec transaction to use for exhaustive eviction. + * + * Attempts to migrate a bo to @bo->placement. If that succeeds, + * pins the bo. + * + * Return: %0 on success, negative error code on migration failure. + */ +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_place *place = &bo->placements[0]; struct xe_device *xe = xe_bo_device(bo); @@ -2327,7 +2791,7 @@ int xe_bo_pin(struct xe_bo *bo) /* We only expect at most 1 pin */ xe_assert(xe, !xe_bo_is_pinned(bo)); - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -2420,6 +2884,7 @@ void xe_bo_unpin(struct xe_bo *bo) * NULL. Used together with @allow_res_evict. * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's * reservation object. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Make sure the bo is in allowed placement, migrating it if necessary. If * needed, other bos will be evicted. If bos selected for eviction shares @@ -2429,16 +2894,19 @@ void xe_bo_unpin(struct xe_bo *bo) * Return: 0 on success, negative error code on failure. May return * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal. */ -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, .gfp_retry_mayfail = true, }; - struct pin_cookie cookie; int ret; + if (xe_bo_is_pinned(bo)) + return 0; + if (vm) { lockdep_assert_held(&vm->lock); xe_vm_assert_held(vm); @@ -2447,10 +2915,11 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } - cookie = xe_vm_set_validating(vm, allow_res_evict); + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); - xe_vm_clear_validating(vm, allow_res_evict, cookie); + xe_vm_clear_validating(vm, allow_res_evict); return ret; } @@ -2524,7 +2993,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; @@ -2644,8 +3113,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_gem_create *args = data; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm = NULL; - ktime_t end = 0; struct xe_bo *bo; unsigned int bo_flags; u32 handle; @@ -2719,25 +3189,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOENT; } -retry: - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto out_vm; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + if (vm) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + } + bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching, + bo_flags, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } - - bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - bo_flags); - - if (vm) - xe_vm_unlock(vm); - - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry; + if (err) goto out_vm; - } if (args->extensions) { err = gem_create_user_extensions(xe, bo, args->extensions, 0); @@ -2886,6 +3357,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * xe_bo_migrate - Migrate an object to the desired region id * @bo: The buffer object to migrate. * @mem_type: The TTM region type to migrate to. + * @tctx: A pointer to a struct ttm_operation_ctx or NULL if + * a default interruptibe ctx is to be used. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Attempt to migrate the buffer object to the desired memory region. The * buffer object may not be pinned, and must be locked. @@ -2897,7 +3371,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place) * Return: 0 on success. Negative error code on failure. In particular may * return -EINTR or -ERESTARTSYS if signal pending. */ -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx, + struct drm_exec *exec) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct ttm_operation_ctx ctx = { @@ -2909,6 +3384,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_place requested; xe_bo_assert_held(bo); + tctx = tctx ? tctx : &ctx; if (bo->ttm.resource->mem_type == mem_type) return 0; @@ -2935,19 +3411,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) add_vram(xe, bo, &requested, bo->flags, mem_type, &c); } - return ttm_bo_validate(&bo->ttm, &placement, &ctx); + if (!tctx->no_wait_gpu) + xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base); + return ttm_bo_validate(&bo->ttm, &placement, tctx); } /** * xe_bo_evict - Evict an object to evict placement * @bo: The buffer object to migrate. + * @exec: The drm_exec transaction to use for exhaustive eviction. * * On successful completion, the object memory will be moved to evict * placement. This function blocks until the object has been fully moved. * * Return: 0 on success. Negative error code on failure. */ -int xe_bo_evict(struct xe_bo *bo) +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec) { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -3107,11 +3586,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv, args->size = ALIGN(mul_u32_u32(args->pitch, args->height), page_size); - bo = xe_bo_create_user(xe, NULL, NULL, args->size, + bo = xe_bo_create_user(xe, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02ada1fb8a23..a77af42b5f9e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -10,8 +10,10 @@ #include "xe_bo_types.h" #include "xe_macros.h" +#include "xe_validation.h" #include "xe_vm_types.h" #include "xe_vm.h" +#include "xe_vram_types.h" #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -23,8 +25,9 @@ #define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ #define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id)) #define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ - XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_VRAM((tile)->mem.vram) : \ XE_BO_FLAG_SYSTEM) #define XE_BO_FLAG_GGTT BIT(5) #define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) @@ -86,43 +89,34 @@ struct sg_table; struct xe_bo *xe_bo_alloc(void); void xe_bo_free(struct xe_bo *bo); -struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, - struct xe_tile *tile, struct dma_resv *resv, - struct ttm_lru_bulk_move *bulk, size_t size, - u16 cpu_caching, enum ttm_bo_type type, - u32 flags); -struct xe_bo * -xe_bo_create_locked_range(struct xe_device *xe, - struct xe_tile *tile, struct xe_vm *vm, - size_t size, u64 start, u64 end, - enum ttm_bo_type type, u32 flags, u64 alignment); +struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, + struct xe_tile *tile, struct dma_resv *resv, + struct ttm_lru_bulk_move *bulk, size_t size, + u16 cpu_caching, enum ttm_bo_type type, + u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, - u16 cpu_caching, - u32 flags); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size, + u16 cpu_caching, u32 flags, struct drm_exec *exec); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm, size_t size, u64 offset, - enum ttm_bo_type type, u32 flags); -struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, - struct xe_tile *tile, - struct xe_vm *vm, - size_t size, u64 offset, - enum ttm_bo_type type, u32 flags, - u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); + enum ttm_bo_type type, u32 flags, + struct drm_exec *exec); +struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, enum ttm_bo_type type, u32 flags, + bool intr); +struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 start, u64 end, + enum ttm_bo_type type, u32 flags); +struct xe_bo * +xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile, + size_t size, u64 offset, enum ttm_bo_type type, + u32 flags, u64 alignment, bool intr); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); +void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags); int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); @@ -201,11 +195,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo) } } -int xe_bo_pin_external(struct xe_bo *bo); -int xe_bo_pin(struct xe_bo *bo); +int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec); +int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec); void xe_bo_unpin_external(struct xe_bo *bo); void xe_bo_unpin(struct xe_bo *bo); -int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict); +int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict, + struct drm_exec *exec); static inline bool xe_bo_is_pinned(struct xe_bo *bo) { @@ -238,6 +233,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -246,7 +254,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -273,8 +281,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res); bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); -int xe_bo_evict(struct xe_bo *bo); +int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc, + struct drm_exec *exec); +int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec); int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); @@ -300,7 +309,22 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); +} + +/** + * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO. + * @bo: the &xe_bo to check + * + * The CCS's BBs should only be setup by the driver VF, but it is safe + * to call this function also by non-VF driver. + * + * Return: true iff the CCS's BBs are setup, false otherwise. + */ +static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo) +{ + return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && + bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7484ce55a303..bc5b4c5fab81 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -158,8 +158,8 @@ int xe_bo_evict_all(struct xe_device *xe) if (ret) return ret; - ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, - &xe->pinned.late.evicted, xe_bo_evict_pinned); + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external, + &xe->pinned.late.external, xe_bo_evict_pinned); if (!ret) ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, @@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index eb5e83c5f233..d4fe3c8dca5b 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -9,6 +9,7 @@ #include <linux/iosys-map.h> #include <drm/drm_gpusvm.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> @@ -24,7 +25,9 @@ struct xe_vm; /* TODO: To be selected with VM_MADVISE */ #define XE_BO_PRIORITY_NORMAL 1 -/** @xe_bo: XE buffer object */ +/** + * struct xe_bo - Xe buffer object + */ struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; @@ -32,8 +35,6 @@ struct xe_bo { struct xe_bo *backup_obj; /** @parent_obj: Ref to parent bo if this a backup_obj */ struct xe_bo *parent_obj; - /** @size: Size of this buffer object */ - size_t size; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ @@ -48,7 +49,7 @@ struct xe_bo { struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; - /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ + /** @kmap: TTM bo kmap object for internal use only. Keep off. */ struct ttm_bo_kmap_obj kmap; /** @pinned_link: link to present / evicted list of pinned BO */ struct list_head pinned_link; @@ -62,6 +63,14 @@ struct xe_bo { */ struct list_head client_link; #endif + /** @attr: User controlled attributes for bo */ + struct { + /** + * @atomic_access: type of atomic access bo needs + * protected by bo dma-resv lock + */ + u32 atomic_access; + } attr; /** * @pxp_key_instance: PXP key instance this BO was created against. A * 0 in this variable indicates that the BO does not use PXP encryption. @@ -75,9 +84,12 @@ struct xe_bo { /** @created: Whether the bo has passed initial creation */ bool created; - /** @ccs_cleared */ + /** @ccs_cleared: true means that CCS region of BO is already cleared */ bool ccs_cleared; + /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ + struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + /** * @cpu_caching: CPU caching mode. Currently only used for userspace * objects. Exceptions are system memory on DGFX, which is always @@ -86,12 +98,13 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ - struct list_head vram_userfault_link; + struct list_head vram_userfault_link; - /** @min_align: minimum alignment needed for this BO if different + /** + * @min_align: minimum alignment needed for this BO if different * from default */ u64 min_align; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8ec1ff1e4e80..139663423185 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -4,42 +4,67 @@ */ #include <linux/bitops.h> +#include <linux/ctype.h> #include <linux/configfs.h> +#include <linux/cleanup.h> #include <linux/find.h> #include <linux/init.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/string.h> +#include "instructions/xe_mi_commands.h" #include "xe_configfs.h" -#include "xe_module.h" - #include "xe_hw_engine_types.h" +#include "xe_module.h" +#include "xe_pci_types.h" /** * DOC: Xe Configfs * * Overview - * ========= + * ======== * * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a - * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory - * The user can create devices under this directory and configure them as necessary - * See Documentation/filesystems/configfs.rst for more information about how configfs works. + * configfs subsystem called ``xe`` that creates a directory in the mounted + * configfs directory. The user can create devices under this directory and + * configure them as necessary. See Documentation/filesystems/configfs.rst for + * more information about how configfs works. * * Create devices - * =============== + * ============== + * + * To create a device, the ``xe`` module should already be loaded, but some + * attributes can only be set before binding the device. It can be accomplished + * by blocking the driver autoprobe:: * - * In order to create a device, the user has to create a directory inside ``'xe'``:: + * # echo 0 > /sys/bus/pci/drivers_autoprobe + * # modprobe xe * - * mkdir /sys/kernel/config/xe/0000:03:00.0/ + * In order to create a device, the user has to create a directory inside ``xe``:: + * + * # mkdir /sys/kernel/config/xe/0000:03:00.0/ * * Every device created is populated by the driver with entries that can be * used to configure it:: * * /sys/kernel/config/xe/ - * .. 0000:03:00.0/ - * ... survivability_mode + * ├── 0000:00:02.0 + * │  └── ... + * ├── 0000:00:02.1 + * │  └── ... + * : + * └── 0000:03:00.0 + * ├── survivability_mode + * ├── engines_allowed + * └── enable_psmi + * + * After configuring the attributes as per next section, the device can be + * probed with:: + * + * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind + * # # or + * # echo 0000:03:00.0 > /sys/bus/pci/drivers_probe * * Configure Attributes * ==================== @@ -51,7 +76,8 @@ * effect when probing the device. Example to enable it:: * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode - * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported) + * + * This attribute can only be set before binding to the device. * * Allowed engines: * ---------------- @@ -77,27 +103,118 @@ * available for migrations, but it's disabled. This is intended for debugging * purposes only. * + * This attribute can only be set before binding to the device. + * + * PSMI + * ---- + * + * Enable extra debugging capabilities to trace engine execution. Only useful + * during early platform enabling and requires additional hardware connected. + * Once it's enabled, additionals WAs are added and runtime configuration is + * done via debugfs. Example to enable it:: + * + * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/enable_psmi + * + * This attribute can only be set before binding to the device. + * + * Context restore BB + * ------------------ + * + * Allow to execute a batch buffer during any context switches. When the + * GPU is restoring the context, it executes additional commands. It's useful + * for testing additional workarounds and validating certain HW behaviors: it's + * not intended for normal execution and will taint the kernel with TAINT_TEST + * when used. + * + * The syntax allows to pass straight instructions to be executed by the engine + * in a batch buffer or set specific registers. + * + * #. Generic instruction:: + * + * <engine-class> cmd <instr> [[dword0] [dword1] [...]] + * + * #. Simple register setting:: + * + * <engine-class> reg <address> <value> + * + * Commands are saved per engine class: all instances of that class will execute + * those commands during context switch. The instruction, dword arguments, + * addresses and values are in hex format like in the examples below. + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the + * normal context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb + * + * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the + * beginning of the context restore:: + * + * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \ + * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb + + * #. Load certain values in a couple of registers (it can be used as a simpler + * alternative to the `cmd`) action:: + * + * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF + * rcs reg 4F100 DEADBEEF + * rcs reg 4F104 FFFFFFFF + * EOF + * + * .. note:: + * + * When using multiple lines, make sure to use a command that is + * implemented with a single write syscall, like HEREDOC. + * + * Currently this is implemented only for post and mid context restore and + * these attributes can only be set before binding to the device. + * * Remove devices * ============== * * The created device directories can be removed using ``rmdir``:: * - * rmdir /sys/kernel/config/xe/0000:03:00.0/ + * # rmdir /sys/kernel/config/xe/0000:03:00.0/ */ -struct xe_config_device { +/* Similar to struct xe_bb, but not tied to HW (yet) */ +struct wa_bb { + u32 *cs; + u32 len; /* in dwords */ +}; + +struct xe_config_group_device { struct config_group group; - bool survivability_mode; - u64 engines_allowed; + struct xe_config_device { + u64 engines_allowed; + struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX]; + struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX]; + bool survivability_mode; + bool enable_psmi; + } config; /* protects attributes */ struct mutex lock; + /* matching descriptor */ + const struct xe_device_desc *desc; +}; + +static const struct xe_config_device device_defaults = { + .engines_allowed = U64_MAX, + .survivability_mode = false, + .enable_psmi = false, }; +static void set_device_defaults(struct xe_config_device *config) +{ + *config = device_defaults; +} + struct engine_info { const char *cls; u64 mask; + enum xe_engine_class engine_class; }; /* Some helpful macros to aid on the sizing of buffer allocation when parsing */ @@ -105,17 +222,48 @@ struct engine_info { #define MAX_ENGINE_INSTANCE_CHARS 2 static const struct engine_info engine_info[] = { - { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK }, - { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK }, - { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK }, - { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK }, - { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK }, - { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK }, + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER }, + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY }, + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE }, + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE }, + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE }, + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER }, }; +static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item) +{ + return container_of(to_config_group(item), struct xe_config_group_device, group); +} + static struct xe_config_device *to_xe_config_device(struct config_item *item) { - return container_of(to_config_group(item), struct xe_config_device, group); + return &to_xe_config_group_device(item)->config; +} + +static bool is_bound(struct xe_config_group_device *dev) +{ + unsigned int domain, bus, slot, function; + struct pci_dev *pdev; + const char *name; + bool ret; + + lockdep_assert_held(&dev->lock); + + name = dev->group.cg_item.ci_name; + if (sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function) != 4) + return false; + + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev) + return false; + + ret = pci_get_drvdata(pdev); + pci_dev_put(pdev); + + if (ret) + pci_dbg(pdev, "Already bound to driver\n"); + + return ret; } static ssize_t survivability_mode_show(struct config_item *item, char *page) @@ -127,7 +275,7 @@ static ssize_t survivability_mode_show(struct config_item *item, char *page) static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); bool survivability_mode; int ret; @@ -135,9 +283,11 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa if (ret) return ret; - mutex_lock(&dev->lock); - dev->survivability_mode = survivability_mode; - mutex_unlock(&dev->lock); + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.survivability_mode = survivability_mode; return len; } @@ -166,7 +316,18 @@ static ssize_t engines_allowed_show(struct config_item *item, char *page) return p - page; } -static bool lookup_engine_mask(const char *pattern, u64 *mask) +/* + * Lookup engine_info. If @mask is not NULL, reduce the mask according to the + * instance in @pattern. + * + * Examples of inputs: + * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and + * mask == BIT_ULL(XE_HW_ENGINE_RCS0) + * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and + * mask == XE_HW_ENGINE_RCS_MASK + * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info + */ +static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask) { for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { u8 instance; @@ -176,70 +337,359 @@ static bool lookup_engine_mask(const char *pattern, u64 *mask) continue; pattern += strlen(engine_info[i].cls); + if (!mask) + return *pattern ? NULL : &engine_info[i]; if (!strcmp(pattern, "*")) { *mask = engine_info[i].mask; - return true; + return &engine_info[i]; } if (kstrtou8(pattern, 10, &instance)) - return false; + return NULL; bit = __ffs64(engine_info[i].mask) + instance; if (bit >= fls64(engine_info[i].mask)) - return false; + return NULL; *mask = BIT_ULL(bit); - return true; + return &engine_info[i]; } - return false; + return NULL; +} + +static int parse_engine(const char *s, const char *end_chars, u64 *mask, + const struct engine_info **pinfo) +{ + char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; + const struct engine_info *info; + size_t len; + + len = strcspn(s, end_chars); + if (len >= sizeof(buf)) + return -EINVAL; + + memcpy(buf, s, len); + buf[len] = '\0'; + + info = lookup_engine_info(buf, mask); + if (!info) + return -ENOENT; + + if (pinfo) + *pinfo = info; + + return len; } static ssize_t engines_allowed_store(struct config_item *item, const char *page, size_t len) { - struct xe_config_device *dev = to_xe_config_device(item); - size_t patternlen, p; + struct xe_config_group_device *dev = to_xe_config_group_device(item); + ssize_t patternlen, p; u64 mask, val = 0; for (p = 0; p < len; p += patternlen + 1) { - char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1]; - - patternlen = strcspn(page + p, ",\n"); - if (patternlen >= sizeof(buf)) + patternlen = parse_engine(page + p, ",\n", &mask, NULL); + if (patternlen < 0) return -EINVAL; - memcpy(buf, page + p, patternlen); - buf[patternlen] = '\0'; + val |= mask; + } + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.engines_allowed = val; + + return len; +} + +static ssize_t enable_psmi_show(struct config_item *item, char *page) +{ + struct xe_config_device *dev = to_xe_config_device(item); + + return sprintf(page, "%d\n", dev->enable_psmi); +} + +static ssize_t enable_psmi_store(struct config_item *item, const char *page, size_t len) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + bool val; + int ret; + + ret = kstrtobool(page, &val); + if (ret) + return ret; + + guard(mutex)(&dev->lock); + if (is_bound(dev)) + return -EBUSY; + + dev->config.enable_psmi = val; + + return len; +} + +static bool wa_bb_read_advance(bool dereference, char **p, + const char *append, size_t len, + size_t *max_size) +{ + if (dereference) { + if (len >= *max_size) + return false; + *max_size -= len; + if (append) + memcpy(*p, append, len); + } + + *p += len; + + return true; +} + +static ssize_t wa_bb_show(struct xe_config_group_device *dev, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + char *data, size_t sz) +{ + char *p = data; + + guard(mutex)(&dev->lock); + + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) { + enum xe_engine_class ec = engine_info[i].engine_class; + size_t len; + + if (!wa_bb[ec].len) + continue; + + len = snprintf(p, sz, "%s:", engine_info[i].cls); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + + for (size_t j = 0; j < wa_bb[ec].len; j++) { + len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]); + if (!wa_bb_read_advance(data, &p, NULL, len, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "\n", 1, &sz)) + return -ENOBUFS; + } + + if (!wa_bb_read_advance(data, &p, "", 1, &sz)) + return -ENOBUFS; + + /* Reserve one more to match check for '\0' */ + if (!data) + p++; + + return p - data; +} + +static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K); +} + +static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); - if (!lookup_engine_mask(buf, &mask)) + return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K); +} + +static void wa_bb_append(struct wa_bb *wa_bb, u32 val) +{ + if (wa_bb->cs) + wa_bb->cs[wa_bb->len] = val; + + wa_bb->len++; +} + +static ssize_t parse_hex(const char *line, u32 *pval) +{ + char numstr[12]; + const char *p; + ssize_t numlen; + + p = line + strspn(line, " \t"); + if (!*p || *p == '\n') + return 0; + + numlen = strcspn(p, " \t\n"); + if (!numlen || numlen >= sizeof(numstr) - 1) + return -EINVAL; + + memcpy(numstr, p, numlen); + numstr[numlen] = '\0'; + p += numlen; + + if (kstrtou32(numstr, 16, pval)) + return -EINVAL; + + return p - line; +} + +/* + * Parse lines with the format + * + * <engine-class> cmd <u32> <u32...> + * <engine-class> reg <u32_addr> <u32_val> + * + * and optionally save them in @wa_bb[i].cs is non-NULL. + * + * Return the number of dwords parsed. + */ +static ssize_t parse_wa_bb_lines(const char *lines, + struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX]) +{ + ssize_t dwords = 0, ret; + const char *p; + + for (p = lines; *p; p++) { + const struct engine_info *info = NULL; + u32 val, val2; + + /* Also allow empty lines */ + p += strspn(p, " \t\n"); + if (!*p) + break; + + ret = parse_engine(p, " \t\n", NULL, &info); + if (ret < 0) + return ret; + + p += ret; + p += strspn(p, " \t"); + + if (str_has_prefix(p, "cmd")) { + for (p += strlen("cmd"); *p;) { + ret = parse_hex(p, &val); + if (ret < 0) + return -EINVAL; + if (!ret) + break; + + p += ret; + dwords++; + wa_bb_append(&wa_bb[info->engine_class], val); + } + } else if (str_has_prefix(p, "reg")) { + p += strlen("reg"); + ret = parse_hex(p, &val); + if (ret <= 0) + return -EINVAL; + + p += ret; + ret = parse_hex(p, &val2); + if (ret <= 0) + return -EINVAL; + + p += ret; + dwords += 3; + wa_bb_append(&wa_bb[info->engine_class], + MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1)); + wa_bb_append(&wa_bb[info->engine_class], val); + wa_bb_append(&wa_bb[info->engine_class], val2); + } else { return -EINVAL; + } + } - val |= mask; + return dwords; +} + +static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX], + struct xe_config_group_device *dev, + const char *page, size_t len) +{ + /* tmp_wa_bb must match wa_bb's size */ + struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { }; + ssize_t count, class; + u32 *tmp; + + /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + guard(mutex)(&dev->lock); + + if (is_bound(dev)) + return -EBUSY; + + /* + * 2. Allocate a u32 array and set the pointers to the right positions + * according to the length of each class' wa_bb + */ + tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (!count) { + memset(wa_bb, 0, sizeof(tmp_wa_bb)); + return len; + } + + for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + tmp_wa_bb[class].cs = tmp + count; + count += tmp_wa_bb[class].len; + tmp_wa_bb[class].len = 0; } - mutex_lock(&dev->lock); - dev->engines_allowed = val; - mutex_unlock(&dev->lock); + /* 3. Parse wa_bb lines again, this time saving the values */ + count = parse_wa_bb_lines(page, tmp_wa_bb); + if (count < 0) + return count; + + memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb)); return len; } -CONFIGFS_ATTR(, survivability_mode); +static ssize_t ctx_restore_mid_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz); +} + +static ssize_t ctx_restore_post_bb_store(struct config_item *item, + const char *data, size_t sz) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz); +} + +CONFIGFS_ATTR(, ctx_restore_mid_bb); +CONFIGFS_ATTR(, ctx_restore_post_bb); +CONFIGFS_ATTR(, enable_psmi); CONFIGFS_ATTR(, engines_allowed); +CONFIGFS_ATTR(, survivability_mode); static struct configfs_attribute *xe_config_device_attrs[] = { - &attr_survivability_mode, + &attr_ctx_restore_mid_bb, + &attr_ctx_restore_post_bb, + &attr_enable_psmi, &attr_engines_allowed, + &attr_survivability_mode, NULL, }; static void xe_config_device_release(struct config_item *item) { - struct xe_config_device *dev = to_xe_config_device(item); + struct xe_config_group_device *dev = to_xe_config_group_device(item); mutex_destroy(&dev->lock); + + kfree(dev->config.ctx_restore_post_bb[0].cs); kfree(dev); } @@ -247,34 +697,106 @@ static struct configfs_item_operations xe_config_device_ops = { .release = xe_config_device_release, }; +static bool xe_config_device_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + struct xe_config_group_device *dev = to_xe_config_group_device(item); + + if (attr == &attr_survivability_mode) { + if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE) + return false; + } + + return true; +} + +static struct configfs_group_operations xe_config_device_group_ops = { + .is_visible = xe_config_device_is_visible, +}; + static const struct config_item_type xe_config_device_type = { .ct_item_ops = &xe_config_device_ops, + .ct_group_ops = &xe_config_device_group_ops, .ct_attrs = xe_config_device_attrs, .ct_owner = THIS_MODULE, }; +static const struct xe_device_desc *xe_match_desc(struct pci_dev *pdev) +{ + struct device_driver *driver = driver_find("xe", &pci_bus_type); + struct pci_driver *drv = to_pci_driver(driver); + const struct pci_device_id *ids = drv ? drv->id_table : NULL; + const struct pci_device_id *found = pci_match_id(ids, pdev); + + return found ? (const void *)found->driver_data : NULL; +} + +static struct pci_dev *get_physfn_instead(struct pci_dev *virtfn) +{ + struct pci_dev *physfn = pci_physfn(virtfn); + + pci_dev_get(physfn); + pci_dev_put(virtfn); + return physfn; +} + static struct config_group *xe_config_make_device_group(struct config_group *group, const char *name) { unsigned int domain, bus, slot, function; - struct xe_config_device *dev; + struct xe_config_group_device *dev; + const struct xe_device_desc *match; struct pci_dev *pdev; + char canonical[16]; + int vfnumber = 0; int ret; - ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function); + ret = sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function); if (ret != 4) return ERR_PTR(-EINVAL); + ret = scnprintf(canonical, sizeof(canonical), "%04x:%02x:%02x.%d", domain, bus, + PCI_SLOT(PCI_DEVFN(slot, function)), + PCI_FUNC(PCI_DEVFN(slot, function))); + if (ret != 12 || strcmp(name, canonical)) + return ERR_PTR(-EINVAL); + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); + if (!pdev && function) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, 0)); + if (!pdev && slot) + pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(0, 0)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + + if (PCI_DEVFN(slot, function) != pdev->devfn) { + pdev = get_physfn_instead(pdev); + vfnumber = PCI_DEVFN(slot, function) - pdev->devfn; + if (!dev_is_pf(&pdev->dev) || vfnumber > pci_sriov_get_totalvfs(pdev)) { + pci_dev_put(pdev); + return ERR_PTR(-ENODEV); + } + } + + match = xe_match_desc(pdev); + if (match && vfnumber && !match->has_sriov) { + pci_info(pdev, "xe driver does not support VFs on this device\n"); + match = NULL; + } else if (!match) { + pci_info(pdev, "xe driver does not support configuration of this device\n"); + } + + pci_dev_put(pdev); + + if (!match) + return ERR_PTR(-ENOENT); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return ERR_PTR(-ENOMEM); - /* Default values */ - dev->engines_allowed = U64_MAX; + dev->desc = match; + set_device_defaults(&dev->config); config_group_init_type_name(&dev->group, name, &xe_config_device_type); @@ -283,12 +805,12 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro return &dev->group; } -static struct configfs_group_operations xe_config_device_group_ops = { +static struct configfs_group_operations xe_config_group_ops = { .make_group = xe_config_make_device_group, }; static const struct config_item_type xe_configfs_type = { - .ct_group_ops = &xe_config_device_group_ops, + .ct_group_ops = &xe_config_group_ops, .ct_owner = THIS_MODULE, }; @@ -301,110 +823,188 @@ static struct configfs_subsystem xe_configfs = { }, }; -static struct xe_config_device *configfs_find_group(struct pci_dev *pdev) +static struct xe_config_group_device *find_xe_config_group_device(struct pci_dev *pdev) { struct config_item *item; - char name[64]; - - snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus), - pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); mutex_lock(&xe_configfs.su_mutex); - item = config_group_find_item(&xe_configfs.su_group, name); + item = config_group_find_item(&xe_configfs.su_group, pci_name(pdev)); mutex_unlock(&xe_configfs.su_mutex); if (!item) return NULL; - return to_xe_config_device(item); + return to_xe_config_group_device(item); +} + +static void dump_custom_dev_config(struct pci_dev *pdev, + struct xe_config_group_device *dev) +{ +#define PRI_CUSTOM_ATTR(fmt_, attr_) do { \ + if (dev->config.attr_ != device_defaults.attr_) \ + pci_info(pdev, "configfs: " __stringify(attr_) " = " fmt_ "\n", \ + dev->config.attr_); \ + } while (0) + + PRI_CUSTOM_ATTR("%llx", engines_allowed); + PRI_CUSTOM_ATTR("%d", enable_psmi); + PRI_CUSTOM_ATTR("%d", survivability_mode); + +#undef PRI_CUSTOM_ATTR +} + +/** + * xe_configfs_check_device() - Test if device was configured by configfs + * @pdev: the &pci_dev device to test + * + * Try to find the configfs group that belongs to the specified pci device + * and print a diagnostic message if different than the default value. + */ +void xe_configfs_check_device(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + + if (!dev) + return; + + /* memcmp here is safe as both are zero-initialized */ + if (memcmp(&dev->config, &device_defaults, sizeof(dev->config))) { + pci_info(pdev, "Found custom settings in configfs\n"); + dump_custom_dev_config(pdev, dev); + } + + config_group_put(&dev->group); } /** * xe_configfs_get_survivability_mode - get configfs survivability mode attribute * @pdev: pci device * - * find the configfs group that belongs to the pci device and return - * the survivability mode attribute - * - * Return: survivability mode if config group is found, false otherwise + * Return: survivability_mode attribute in configfs */ bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); bool mode; if (!dev) - return false; + return device_defaults.survivability_mode; - mode = dev->survivability_mode; - config_item_put(&dev->group.cg_item); + mode = dev->config.survivability_mode; + config_group_put(&dev->group); return mode; } /** - * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute + * xe_configfs_get_engines_allowed - get engine allowed mask from configfs + * @pdev: pci device + * + * Return: engine mask with allowed engines set in configfs + */ +u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u64 engines_allowed; + + if (!dev) + return device_defaults.engines_allowed; + + engines_allowed = dev->config.engines_allowed; + config_group_put(&dev->group); + + return engines_allowed; +} + +/** + * xe_configfs_get_psmi_enabled - get configfs enable_psmi setting * @pdev: pci device * - * find the configfs group that belongs to the pci device and clear survivability - * mode attribute + * Return: enable_psmi setting in configfs */ -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { - struct xe_config_device *dev = configfs_find_group(pdev); + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + bool ret; if (!dev) - return; + return false; - mutex_lock(&dev->lock); - dev->survivability_mode = 0; - mutex_unlock(&dev->lock); + ret = dev->config.enable_psmi; + config_group_put(&dev->group); - config_item_put(&dev->group.cg_item); + return ret; } /** - * xe_configfs_get_engines_allowed - get engine allowed mask from configfs + * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe * - * Find the configfs group that belongs to the pci device and return - * the mask of engines allowed to be used. + * Return: Number of dwords used in the mid_ctx_restore setting in configfs + */ +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) +{ + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; + + if (!dev) + return 0; + + if (cs) + *cs = dev->config.ctx_restore_mid_bb[class].cs; + + len = dev->config.ctx_restore_mid_bb[class].len; + config_group_put(&dev->group); + + return len; +} + +/** + * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting + * @pdev: pci device + * @class: hw engine class + * @cs: pointer to the bb to use - only valid during probe * - * Return: engine mask with allowed engines + * Return: Number of dwords used in the post_ctx_restore setting in configfs */ -u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, + enum xe_engine_class class, + const u32 **cs) { - struct xe_config_device *dev = configfs_find_group(pdev); - u64 engines_allowed; + struct xe_config_group_device *dev = find_xe_config_group_device(pdev); + u32 len; if (!dev) - return U64_MAX; + return 0; - engines_allowed = dev->engines_allowed; - config_item_put(&dev->group.cg_item); + *cs = dev->config.ctx_restore_post_bb[class].cs; + len = dev->config.ctx_restore_post_bb[class].len; + config_group_put(&dev->group); - return engines_allowed; + return len; } int __init xe_configfs_init(void) { - struct config_group *root = &xe_configfs.su_group; int ret; - config_group_init(root); + config_group_init(&xe_configfs.su_group); mutex_init(&xe_configfs.su_mutex); ret = configfs_register_subsystem(&xe_configfs); if (ret) { - pr_err("Error %d while registering %s subsystem\n", - ret, root->cg_item.ci_namebuf); + mutex_destroy(&xe_configfs.su_mutex); return ret; } return 0; } -void __exit xe_configfs_exit(void) +void xe_configfs_exit(void) { configfs_unregister_subsystem(&xe_configfs); + mutex_destroy(&xe_configfs.su_mutex); } - diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h index fb8764008089..c61e0e47ed94 100644 --- a/drivers/gpu/drm/xe/xe_configfs.h +++ b/drivers/gpu/drm/xe/xe_configfs.h @@ -8,20 +8,32 @@ #include <linux/limits.h> #include <linux/types.h> +#include <xe_hw_engine_types.h> + struct pci_dev; #if IS_ENABLED(CONFIG_CONFIGFS_FS) int xe_configfs_init(void); void xe_configfs_exit(void); +void xe_configfs_check_device(struct pci_dev *pdev); bool xe_configfs_get_survivability_mode(struct pci_dev *pdev); -void xe_configfs_clear_survivability_mode(struct pci_dev *pdev); u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev); +bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev); +u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); +u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs); #else static inline int xe_configfs_init(void) { return 0; } static inline void xe_configfs_exit(void) { } +static inline void xe_configfs_check_device(struct pci_dev *pdev) { } static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; } -static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { } static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; } +static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; } +static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } +static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class, + const u32 **cs) { return 0; } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d83cd6ed3fa8..cd977dbd1ef6 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -11,16 +11,24 @@ #include <drm/drm_debugfs.h> +#include "regs/xe_pmt.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt_debugfs.h" #include "xe_gt_printk.h" #include "xe_guc_ads.h" +#include "xe_mmio.h" #include "xe_pm.h" +#include "xe_psmi.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_vf.h" #include "xe_step.h" +#include "xe_tile_debugfs.h" +#include "xe_wa.h" +#include "xe_vsec.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -29,6 +37,24 @@ #endif DECLARE_FAULT_ATTR(gt_reset_failure); +DECLARE_FAULT_ATTR(inject_csc_hw_error); + +static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio, + u32 offset, const char *name, struct drm_printer *p) +{ + u64 residency = 0; + int ret; + + ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev), + xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID), + &residency, offset, sizeof(residency)); + if (ret != sizeof(residency)) { + drm_warn(&xe->drm, "%s counter failed to read, ret %d\n", name, ret); + return; + } + + drm_printf(p, "%s : %llu\n", name, residency); +} static struct xe_device *node_to_xe(struct drm_info_node *node) { @@ -82,9 +108,88 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + +static int dgfx_pkg_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + static const struct { + u32 offset; + const char *name; + } residencies[] = { + {BMG_G2_RESIDENCY_OFFSET, "Package G2"}, + {BMG_G6_RESIDENCY_OFFSET, "Package G6"}, + {BMG_G8_RESIDENCY_OFFSET, "Package G8"}, + {BMG_G10_RESIDENCY_OFFSET, "Package G10"}, + {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + +static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data) +{ + struct xe_device *xe; + struct xe_mmio *mmio; + struct drm_printer p; + + xe = node_to_xe(m->private); + p = drm_seq_file_printer(m); + xe_pm_runtime_get(xe); + mmio = xe_root_tile_mmio(xe); + + static const struct { + u32 offset; + const char *name; + } residencies[] = { + {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"}, + {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"}, + {BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET, "PCIE LINK L1.2 RESIDENCY"} + }; + + for (int i = 0; i < ARRAY_SIZE(residencies); i++) + read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p); + + xe_pm_runtime_put(xe); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, +}; + +static const struct drm_info_list debugfs_residencies[] = { + { .name = "dgfx_pkg_residencies", .show = dgfx_pkg_residencies_show, }, + { .name = "dgfx_pcie_link_residencies", .show = dgfx_pcie_link_residencies_show, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -226,20 +331,68 @@ static const struct file_operations atomic_svm_timeslice_ms_fops = { .write = atomic_svm_timeslice_ms_set, }; +static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + char buf[32]; + int len; + + len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable); + + return simple_read_from_buffer(ubuf, size, pos, buf, len); +} + +static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf, + size_t size, loff_t *pos) +{ + struct xe_device *xe = file_inode(f)->i_private; + struct xe_late_bind *late_bind = &xe->late_bind; + u32 uval; + ssize_t ret; + + ret = kstrtouint_from_user(ubuf, size, sizeof(uval), &uval); + if (ret) + return ret; + + if (uval > 1) + return -EINVAL; + + late_bind->disable = !!uval; + return size; +} + +static const struct file_operations disable_late_binding_fops = { + .owner = THIS_MODULE, + .read = disable_late_binding_show, + .write = disable_late_binding_set, +}; + void xe_debugfs_register(struct xe_device *xe) { struct ttm_device *bdev = &xe->ttm; struct drm_minor *minor = xe->drm.primary; struct dentry *root = minor->debugfs_root; struct ttm_resource_manager *man; + struct xe_tile *tile; struct xe_gt *gt; u32 mem_type; + u8 tile_id; u8 id; drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), root, minor); + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { + drm_debugfs_create_files(debugfs_residencies, + ARRAY_SIZE(debugfs_residencies), + root, minor); + fault_create_debugfs_attr("inject_csc_hw_error", root, + &inject_csc_hw_error); + } + debugfs_create_file("forcewake_all", 0400, root, xe, &forcewake_all_fops); @@ -249,6 +402,9 @@ void xe_debugfs_register(struct xe_device *xe) debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe, &atomic_svm_timeslice_ms_fops); + debugfs_create_file("disable_late_binding", 0600, root, xe, + &disable_late_binding_fops); + for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) { man = ttm_manager_type(bdev, mem_type); @@ -267,10 +423,20 @@ void xe_debugfs_register(struct xe_device *xe) if (man) ttm_resource_manager_create_debugfs(man, root, "stolen_mm"); + for_each_tile(tile, xe, tile_id) + xe_tile_debugfs_register(tile); + for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); xe_pxp_debugfs_register(xe->pxp); + xe_psmi_debugfs_register(xe); + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); + else if (IS_SRIOV_VF(xe)) + xe_sriov_vf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_dep_job_types.h b/drivers/gpu/drm/xe/xe_dep_job_types.h new file mode 100644 index 000000000000..c6a484f24c8c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_job_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_DEP_JOB_TYPES_H_ +#define _XE_DEP_JOB_TYPES_H_ + +#include <drm/gpu_scheduler.h> + +struct xe_dep_job; + +/** struct xe_dep_job_ops - Generic Xe dependency job operations */ +struct xe_dep_job_ops { + /** @run_job: Run generic Xe dependency job */ + struct dma_fence *(*run_job)(struct xe_dep_job *job); + /** @free_job: Free generic Xe dependency job */ + void (*free_job)(struct xe_dep_job *job); +}; + +/** struct xe_dep_job - Generic dependency Xe job */ +struct xe_dep_job { + /** @drm: base DRM scheduler job */ + struct drm_sched_job drm; + /** @ops: dependency job operations */ + const struct xe_dep_job_ops *ops; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.c b/drivers/gpu/drm/xe/xe_dep_scheduler.c new file mode 100644 index 000000000000..9bd3bfd2e526 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/gpu_scheduler.h> + +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_device_types.h" + +/** + * DOC: Xe Dependency Scheduler + * + * The Xe dependency scheduler is a simple wrapper built around the DRM + * scheduler to execute jobs once their dependencies are resolved (i.e., all + * input fences specified as dependencies are signaled). The jobs that are + * executed contain virtual functions to run (execute) and free the job, + * allowing a single dependency scheduler to handle jobs performing different + * operations. + * + * Example use cases include deferred resource freeing, TLB invalidations after + * bind jobs, etc. + */ + +/** struct xe_dep_scheduler - Generic Xe dependency scheduler */ +struct xe_dep_scheduler { + /** @sched: DRM GPU scheduler */ + struct drm_gpu_scheduler sched; + /** @entity: DRM scheduler entity */ + struct drm_sched_entity entity; + /** @rcu: For safe freeing of exported dma fences */ + struct rcu_head rcu; +}; + +static struct dma_fence *xe_dep_scheduler_run_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + return dep_job->ops->run_job(dep_job); +} + +static void xe_dep_scheduler_free_job(struct drm_sched_job *drm_job) +{ + struct xe_dep_job *dep_job = + container_of(drm_job, typeof(*dep_job), drm); + + dep_job->ops->free_job(dep_job); +} + +static const struct drm_sched_backend_ops sched_ops = { + .run_job = xe_dep_scheduler_run_job, + .free_job = xe_dep_scheduler_free_job, +}; + +/** + * xe_dep_scheduler_create() - Generic Xe dependency scheduler create + * @xe: Xe device + * @submit_wq: Submit workqueue struct (can be NULL) + * @name: Name of dependency scheduler + * @job_limit: Max dependency jobs that can be scheduled + * + * Create a generic Xe dependency scheduler and initialize internal DRM + * scheduler objects. + * + * Return: Generic Xe dependency scheduler object on success, ERR_PTR failure + */ +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit) +{ + struct xe_dep_scheduler *dep_scheduler; + struct drm_gpu_scheduler *sched; + const struct drm_sched_init_args args = { + .ops = &sched_ops, + .submit_wq = submit_wq, + .num_rqs = 1, + .credit_limit = job_limit, + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = name, + .dev = xe->drm.dev, + }; + int err; + + dep_scheduler = kzalloc(sizeof(*dep_scheduler), GFP_KERNEL); + if (!dep_scheduler) + return ERR_PTR(-ENOMEM); + + err = drm_sched_init(&dep_scheduler->sched, &args); + if (err) + goto err_free; + + sched = &dep_scheduler->sched; + err = drm_sched_entity_init(&dep_scheduler->entity, 0, &sched, 1, NULL); + if (err) + goto err_sched; + + init_rcu_head(&dep_scheduler->rcu); + + return dep_scheduler; + +err_sched: + drm_sched_fini(&dep_scheduler->sched); +err_free: + kfree(dep_scheduler); + + return ERR_PTR(err); +} + +/** + * xe_dep_scheduler_fini() - Generic Xe dependency scheduler finalize + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Finalize internal DRM scheduler objects and free generic Xe dependency + * scheduler object + */ +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler) +{ + drm_sched_entity_fini(&dep_scheduler->entity); + drm_sched_fini(&dep_scheduler->sched); + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(dep_scheduler, rcu); +} + +/** + * xe_dep_scheduler_entity() - Retrieve a generic Xe dependency scheduler + * DRM scheduler entity + * @dep_scheduler: Generic Xe dependency scheduler object + * + * Return: The generic Xe dependency scheduler's DRM scheduler entity + */ +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler) +{ + return &dep_scheduler->entity; +} diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.h b/drivers/gpu/drm/xe/xe_dep_scheduler.h new file mode 100644 index 000000000000..853961eec64b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_dep_scheduler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/types.h> + +struct drm_sched_entity; +struct workqueue_struct; +struct xe_dep_scheduler; +struct xe_device; + +struct xe_dep_scheduler * +xe_dep_scheduler_create(struct xe_device *xe, + struct workqueue_struct *submit_wq, + const char *name, u32 job_limit); + +void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler); + +struct drm_sched_entity * +xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -313,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -335,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8cfcfff250ca..34d33965eac2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,18 +40,22 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" -#include "xe_memirq.h" +#include "xe_late_bind_fw.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" #include "xe_pmu.h" +#include "xe_psmi.h" #include "xe_pxp.h" #include "xe_query.h" #include "xe_shrinker.h" @@ -61,11 +65,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" +#include "xe_vm_madvise.h" #include "xe_vram.h" +#include "xe_vram_types.h" #include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -197,6 +204,9 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl, + DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -448,6 +458,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, if (err) goto err; + xe_validation_device_init(&xe->val); + init_waitqueue_head(&xe->ufence_wq); init_rwsem(&xe->usm.lock); @@ -521,7 +533,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe) * re-init and saving/restoring (or re-populating) the wiped memory. Since we * perform the FLR as the very last action before releasing access to the HW * during the driver release flow, we don't attempt recovery at all, because - * if/when a new instance of i915 is bound to the device it will do a full + * if/when a new instance of Xe is bound to the device it will do a full * re-init anyway. */ static void __xe_driver_flr(struct xe_device *xe) @@ -673,15 +685,31 @@ static int wait_for_lmem_ready(struct xe_device *xe) } ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */ -static void sriov_update_device_info(struct xe_device *xe) +static void vf_update_device_info(struct xe_device *xe) { + xe_assert(xe, IS_SRIOV_VF(xe)); /* disable features that are not available/applicable to VFs */ - if (IS_SRIOV_VF(xe)) { - xe->info.probe_display = 0; - xe->info.has_heci_gscfi = 0; - xe->info.skip_guc_pc = 1; - xe->info.skip_pcode = 1; - } + xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; + xe->info.has_heci_gscfi = 0; + xe->info.has_late_bind = 0; + xe->info.skip_guc_pc = 1; + xe->info.skip_pcode = 1; +} + +static int xe_device_vram_alloc(struct xe_device *xe) +{ + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return -ENOMEM; + + xe->mem.vram = vram; + return 0; } /** @@ -698,13 +726,17 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; xe_sriov_probe_early(xe); - sriov_update_device_info(xe); + if (IS_SRIOV_VF(xe)) + vf_update_device_info(xe); err = xe_pcode_probe_early(xe); if (err || xe_survivability_mode_is_requested(xe)) { @@ -715,7 +747,7 @@ int xe_device_probe_early(struct xe_device *xe) * possible, but still return the previous error for error * propagation */ - err = xe_survivability_mode_enable(xe); + err = xe_survivability_mode_boot_enable(xe); if (err) return err; @@ -728,6 +760,10 @@ int xe_device_probe_early(struct xe_device *xe) xe->wedged.mode = xe_modparam.wedged_mode; + err = xe_device_vram_alloc(xe); + if (err) + return err; + return 0; } ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */ @@ -783,52 +819,18 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); - - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(>->uc.guc); - err = xe_gt_sriov_vf_bootstrap(gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(gt); - if (err) - return err; - } } for_each_tile(tile, xe, id) { err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; } - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; - } - - err = xe_devcoredump_init(xe); - if (err) - return err; - /* * From here on, if a step fails, make sure a Driver-FLR is triggereed */ @@ -850,6 +852,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) @@ -881,10 +891,24 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_GT_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + + err = xe_devcoredump_init(xe); + if (err) + return err; + + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; + err = xe_late_bind_init(&xe->late_bind); + if (err) + return err; + err = xe_oa_init(xe); if (err) return err; @@ -897,6 +921,10 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; + err = xe_psmi_init(xe); + if (err) + return err; + err = drm_dev_register(&xe->drm, 0); if (err) return err; @@ -921,11 +949,19 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); xe_vsec_init(xe); + err = xe_sriov_init_late(xe); + if (err) + goto err_unregister_display; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -938,6 +974,8 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); @@ -981,38 +1019,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1022,6 +1037,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1044,7 +1060,7 @@ void xe_device_l2_flush(struct xe_device *xe) gt = xe_root_mmio_gt(xe); - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); @@ -1052,15 +1068,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_GT_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? @@ -1122,11 +1175,63 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg) } /** + * DOC: Xe Device Wedging + * + * Xe driver uses drm device wedged uevent as documented in Documentation/gpu/drm-uapi.rst. + * When device is in wedged state, every IOCTL will be blocked and GT cannot be + * used. Certain critical errors like gt reset failure, firmware failures can cause + * the device to be wedged. The default recovery method for a wedged state + * is rebind/bus-reset. + * + * Another recovery method is vendor-specific. Below are the cases that send + * ``WEDGED=vendor-specific`` recovery method in drm device wedged uevent. + * + * Case: Firmware Flash + * -------------------- + * + * Identification Hint + * +++++++++++++++++++ + * + * ``WEDGED=vendor-specific`` drm device wedged uevent with + * :ref:`Runtime Survivability mode <xe-survivability-mode>` is used to notify + * admin/userspace consumer about the need for a firmware flash. + * + * Recovery Procedure + * ++++++++++++++++++ + * + * Once ``WEDGED=vendor-specific`` drm device wedged uevent is received, follow + * the below steps + * + * - Check Runtime Survivability mode sysfs. + * If enabled, firmware flash is required to recover the device. + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * - Admin/userpsace consumer can use firmware flashing tools like fwupd to flash + * firmware and restore device to normal operation. + */ + +/** + * xe_device_set_wedged_method - Set wedged recovery method + * @xe: xe device instance + * @method: recovery method to set + * + * Set wedged recovery method to be sent in drm wedged uevent. + */ +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method) +{ + xe->wedged.method = method; +} + +/** * xe_device_declare_wedged - Declare device wedged * @xe: xe device instance * - * This is a final state that can only be cleared with a module - * re-probe (unbind + bind). + * This is a final state that can only be cleared with the recovery method + * specified in the drm wedged uevent. The method can be set using + * xe_device_set_wedged_method before declaring the device as wedged. If no method + * is set, reprobe (unbind/re-bind) will be sent by default. + * * In this state every IOCTL will be blocked so the GT cannot be used. * In general it will be called upon any critical error such as gt reset * failure or guc loading failure. Userspace will be notified of this state @@ -1160,13 +1265,18 @@ void xe_device_declare_wedged(struct xe_device *xe) "IOCTLs and executions are blocked. Only a rebind may clear the failure\n" "Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n", dev_name(xe->drm.dev)); - - /* Notify userspace of wedged device */ - drm_dev_wedged_event(&xe->drm, - DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET, - NULL); } for_each_gt(gt, xe, id) xe_gt_declare_wedged(gt); + + if (xe_device_wedged(xe)) { + /* If no wedge recovery method is set, use default */ + if (!xe->wedged.method) + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_REBIND | + DRM_WEDGE_RECOVERY_BUS_RESET); + + /* Notify userspace of wedged device */ + drm_dev_wedged_event(&xe->drm, xe->wedged.method, NULL); + } } diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index e4da797a984b..32cc6323b7f6 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; - - gt = xe->tiles[gt_id].primary_gt; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; + + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) @@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; @@ -190,6 +187,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) return atomic_read(&xe->wedged.flag); } +void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method); void xe_device_declare_wedged(struct xe_device *xe); struct xe_file *xe_file_get(struct xe_file *xef); diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index b9440f8c781e..c5151c86a98a 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -24,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -65,6 +71,131 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static struct attribute *vram_attrs[] = { + &dev_attr_vram_d3cold_threshold.attr, + NULL +}; + +static const struct attribute_group vram_attr_group = { + .attrs = vram_attrs, +}; + +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static struct attribute *late_bind_attrs[] = { + &dev_attr_lb_fan_control_version.attr, + &dev_attr_lb_voltage_regulator_version.attr, + NULL +}; + +static umode_t late_bind_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + xe_pm_runtime_put(xe); + if (ret) + return 0; + + if (attr == &dev_attr_lb_fan_control_version.attr && + REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + return attr->mode; + if (attr == &dev_attr_lb_voltage_regulator_version.attr && + REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + return attr->mode; + + return 0; +} + +static const struct attribute_group late_bind_attr_group = { + .attrs = late_bind_attrs, + .is_visible = late_bind_attr_is_visible, +}; + /** * DOC: PCIe Gen5 Limitations * @@ -138,22 +269,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status); -static const struct attribute *auto_link_downgrade_attrs[] = { +static struct attribute *auto_link_downgrade_attrs[] = { &dev_attr_auto_link_downgrade_capable.attr, &dev_attr_auto_link_downgrade_status.attr, NULL }; -static void xe_device_sysfs_fini(void *arg) -{ - struct xe_device *xe = arg; - - if (xe->d3cold.capable) - sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - - if (xe->info.platform == XE_BATTLEMAGE) - sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); -} +static const struct attribute_group auto_link_downgrade_attr_group = { + .attrs = auto_link_downgrade_attrs, +}; int xe_device_sysfs_init(struct xe_device *xe) { @@ -161,16 +285,20 @@ int xe_device_sysfs_init(struct xe_device *xe) int ret; if (xe->d3cold.capable) { - ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); + ret = devm_device_add_group(dev, &vram_attr_group); if (ret) return ret; } - if (xe->info.platform == XE_BATTLEMAGE) { - ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); + if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) { + ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group); + if (ret) + return ret; + + ret = devm_device_add_group(dev, &late_bind_attr_group); if (ret) return ret; } - return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 003afb279a5e..74d7af830b85 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -10,21 +10,24 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_device.h> #include "xe_devcoredump_types.h" #include "xe_heci_gsc.h" +#include "xe_late_bind_fw_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" #include "xe_oa_types.h" #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" +#include "xe_sriov_vf_ccs_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" -#include "xe_ttm_vram_mgr_types.h" +#include "xe_validation.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define TEST_VM_OPS_ERROR @@ -32,9 +35,12 @@ struct dram_info; struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; +struct xe_vram_region; #define XE_BO_INVALID_OFFSET LONG_MAX @@ -68,61 +74,6 @@ struct xe_pxp; struct xe_tile * : (tile__)->xe) /** - * struct xe_vram_region - memory region structure - * This is used to describe a memory region in xe - * device, such as HBM memory or CXL extension memory. - */ -struct xe_vram_region { - /** @io_start: IO start address of this VRAM instance */ - resource_size_t io_start; - /** - * @io_size: IO size of this VRAM instance - * - * This represents how much of this VRAM we can access - * via the CPU through the VRAM BAR. This can be smaller - * than @usable_size, in which case only part of VRAM is CPU - * accessible (typically the first 256M). This - * configuration is known as small-bar. - */ - resource_size_t io_size; - /** @dpa_base: This memory regions's DPA (device physical address) base */ - resource_size_t dpa_base; - /** - * @usable_size: usable size of VRAM - * - * Usable size of VRAM excluding reserved portions - * (e.g stolen mem) - */ - resource_size_t usable_size; - /** - * @actual_physical_size: Actual VRAM size - * - * Actual VRAM size including reserved portions - * (e.g stolen mem) - */ - resource_size_t actual_physical_size; - /** @mapping: pointer to VRAM mappable space */ - void __iomem *mapping; - /** @ttm: VRAM TTM manager */ - struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) - /** @pagemap: Used to remap device memory as ZONE_DEVICE */ - struct dev_pagemap pagemap; - /** - * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory - * pages of this tile. - */ - struct drm_pagemap dpagemap; - /** - * @hpa_base: base host physical address - * - * This is generated when remap device memory as ZONE_DEVICE - */ - resource_size_t hpa_base; -#endif -}; - -/** * struct xe_mmio - register mmio structure * * Represents an MMIO region that the CPU may use to access registers. A @@ -212,7 +163,7 @@ struct xe_tile { * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; @@ -240,6 +191,9 @@ struct xe_tile { /** @memirq: Memory Based Interrupts. */ struct xe_memirq memirq; + /** @csc_hw_error_work: worker to report CSC HW errors */ + struct work_struct csc_hw_error_work; + /** @pcode: tile's PCODE */ struct { /** @pcode.lock: protecting tile's PCODE mailbox data */ @@ -251,6 +205,9 @@ struct xe_tile { /** @sysfs: sysfs' kobj used by xe_tile_sysfs */ struct kobject *sysfs; + + /** @debugfs: debugfs directory associated with this tile */ + struct dentry *debugfs; }; /** @@ -293,6 +250,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -316,10 +275,14 @@ struct xe_device { u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; + /** @info.has_late_bind: Device has firmware late binding support */ + u8 has_late_bind:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; /** @info.has_mbx_power_limits: Device has support to manage power limits using @@ -328,8 +291,8 @@ struct xe_device { u8 has_mbx_power_limits:1; /** @info.has_pxp: Device has PXP support */ u8 has_pxp:1; - /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ - u8 has_range_tlb_invalidation:1; + /** @info.has_range_tlb_inval: Has range based TLB invalidations */ + u8 has_range_tlb_inval:1; /** @info.has_sriov: Supports SR-IOV */ u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ @@ -357,6 +320,19 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -391,7 +367,7 @@ struct xe_device { /** @mem: memory info for device */ struct { /** @mem.vram: VRAM info for device */ - struct xe_vram_region vram; + struct xe_vram_region *vram; /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; /** @mem.sys_mgr: system memory shrinker. */ @@ -403,10 +379,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -453,7 +431,7 @@ struct xe_device { /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; - /** @unordered_wq: used to serialize unordered work, mostly display */ + /** @unordered_wq: used to serialize unordered work */ struct workqueue_struct *unordered_wq; /** @destroy_wq: used to serialize user destroy work, like queue */ @@ -530,6 +508,12 @@ struct xe_device { /** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */ struct notifier_block pm_notifier; + /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */ + struct completion pm_block; + /** @rebind_resume_list: List of wq items to kick on resume. */ + struct list_head rebind_resume_list; + /** @rebind_resume_lock: Lock to protect the rebind_resume_list */ + struct mutex rebind_resume_lock; /** @pmt: Support the PMT driver callback interface */ struct { @@ -549,6 +533,12 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + + /** @late_bind: xe mei late bind interface */ + struct xe_late_bind late_bind; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -564,6 +554,8 @@ struct xe_device { atomic_t flag; /** @wedged.mode: Mode controlled by kernel parameter and debugfs */ int mode; + /** @wedged.method: Recovery method to be sent in the drm device wedged uevent */ + unsigned long method; } wedged; /** @bo_device: Struct to control async free of BOs */ @@ -577,6 +569,9 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ u32 atomic_svm_timeslice_ms; @@ -588,6 +583,31 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /** @val: The domain for exhaustive eviction, which is currently per device. */ + struct xe_validation_device val; + + /** @psmi: GPU debugging via additional validation HW */ + struct { + /** @psmi.capture_obj: PSMI buffer for VRAM */ + struct xe_bo *capture_obj[XE_MAX_TILES_PER_DEVICE + 1]; + /** @psmi.region_mask: Mask of valid memory regions */ + u8 region_mask; + } psmi; + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + /** @g2g_test_array: for testing G2G communications */ + u32 *g2g_test_array; + /** @g2g_test_count: for testing G2G communications */ + atomic_t g2g_test_count; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -621,7 +641,6 @@ struct xe_device { struct { unsigned int hpll_freq; unsigned int czclk_freq; - unsigned int fsb_freq, mem_freq, is_ddr3; }; #endif }; diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..3a0c4ccc4224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,2 @@ +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 346f857f3837..a7d67725c3ee 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -51,6 +51,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) struct drm_gem_object *obj = attach->dmabuf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); struct xe_device *xe = xe_bo_device(bo); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; int ret; /* @@ -63,7 +64,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return -EINVAL; } - ret = xe_bo_migrate(bo, XE_PL_TT); + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); if (ret) { if (ret != -EINTR && ret != -ERESTARTSYS) drm_dbg(&xe->drm, @@ -72,7 +73,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) return ret; } - ret = xe_bo_pin_external(bo); + ret = xe_bo_pin_external(bo, true, exec); xe_assert(xe, !ret); return 0; @@ -92,6 +93,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, struct dma_buf *dma_buf = attach->dmabuf; struct drm_gem_object *obj = dma_buf->priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; struct sg_table *sgt; int r = 0; @@ -100,9 +102,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach, if (!xe_bo_is_pinned(bo)) { if (!attach->peer2peer) - r = xe_bo_migrate(bo, XE_PL_TT); + r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec); else - r = xe_bo_validate(bo, NULL, false); + r = xe_bo_validate(bo, NULL, false, exec); if (r) return ERR_PTR(r); } @@ -161,15 +163,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, struct xe_bo *bo = gem_to_xe_bo(obj); bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int ret = 0; if (!reads) return 0; /* Can we do interruptible lock here? */ - xe_bo_lock(bo, false); - (void)xe_bo_migrate(bo, XE_PL_TT); - xe_bo_unlock(bo); + xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &ret); + } + /* If we failed, cpu-access takes place in current placement. */ return 0; } @@ -191,10 +204,22 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) { struct xe_bo *bo = gem_to_xe_bo(obj); struct dma_buf *buf; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = true, + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = false, + }; + int ret; if (bo->vm) return ERR_PTR(-EPERM); + ret = ttm_bo_setup_export(&bo->ttm, &ctx); + if (ret) + return ERR_PTR(ret); + buf = drm_gem_prime_export(obj, flags); if (!IS_ERR(buf)) buf->ops = &xe_dmabuf_ops; @@ -208,32 +233,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); + struct xe_validation_ctx ctx; + struct drm_gem_object *dummy_obj; + struct drm_exec exec; struct xe_bo *bo; - int ret; - - dma_resv_lock(resv, NULL); - bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, - 0, /* Will require 1way or 2way for vm_bind */ - ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); - if (IS_ERR(bo)) { - ret = PTR_ERR(bo); - goto error; + int ret = 0; + + dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); + if (!dummy_obj) + return ERR_PTR(-ENOMEM); + + dummy_obj->resv = resv; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { + ret = drm_exec_lock_obj(&exec, dummy_obj); + drm_exec_retry_on_contention(&exec); + if (ret) + break; + + bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + 0, /* Will require 1way or 2way for vm_bind */ + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &ret); + break; + } } - dma_resv_unlock(resv); - - return &bo->ttm.base; + drm_gem_object_put(dummy_obj); -error: - dma_resv_unlock(resv); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : &bo->ttm.base; } static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->importer_priv; struct xe_bo *bo = gem_to_xe_bo(obj); + struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED; - XE_WARN_ON(xe_bo_evict(bo)); + XE_WARN_ON(xe_bo_evict(bo, exec)); } static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = { diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 96732613b4b7..f5cfdf29fde3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -258,11 +258,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } @@ -615,9 +617,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, size = stream->per_xecore_buf_size * last_xecore; - bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, - size, ~0ull, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); + bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); if (IS_ERR(bo)) { kfree(stream->xecore_buf); return PTR_ERR(bo); @@ -647,7 +648,7 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) return -ETIMEDOUT; } - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); @@ -803,7 +804,7 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) cancel_delayed_work_sync(&stream->buf_poll_work); - if (XE_WA(gt, 22016596838)) + if (XE_GT_WA(gt, 22016596838)) xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 44364c042ad7..7715e74bb945 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -19,6 +19,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_sync.h" +#include "xe_svm.h" #include "xe_vm.h" /** @@ -97,9 +98,13 @@ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); + int ret; /* The fence slot added here is intended for the exec sched job. */ - return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, &vm_exec->exec); + ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1); + xe_vm_set_validation_exec(vm, NULL); + return ret; } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -115,10 +120,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; u32 i, num_syncs, num_ufence = 0; + struct xe_validation_ctx ctx; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; - ktime_t end = 0; int err = 0; struct xe_hw_engine_group *group; enum xe_hw_engine_group_execution_mode mode, previous_mode; @@ -237,17 +242,21 @@ retry: goto err_unlock_list; } - vm_exec.vm = &vm->gpuvm; - vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; - if (xe_vm_in_lr_mode(vm)) { - drm_exec_init(exec, vm_exec.flags, 0); - } else { - err = drm_gpuvm_exec_lock(&vm_exec); - if (err) { - if (xe_vm_validate_should_retry(exec, err, &end)) - err = -EAGAIN; + /* + * It's OK to block interruptible here with the vm lock held, since + * on task freezing during suspend / hibernate, the call will + * return -ERESTARTSYS and the IOCTL will be rerun. + */ + err = wait_for_completion_interruptible(&xe->pm_block); + if (err) + goto err_unlock_list; + + if (!xe_vm_in_lr_mode(vm)) { + vm_exec.vm = &vm->gpuvm; + vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; + err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val); + if (err) goto err_unlock_list; - } } if (xe_vm_is_closed_or_banned(q->vm)) { @@ -294,7 +303,7 @@ retry: if (err) goto err_put_job; - err = down_read_interruptible(&vm->userptr.notifier_lock); + err = xe_svm_notifier_lock_interruptible(vm); if (err) goto err_put_job; @@ -336,12 +345,13 @@ retry: err_repin: if (!xe_vm_in_lr_mode(vm)) - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err_put_job: if (err) xe_sched_job_put(job); err_exec: - drm_exec_fini(exec); + if (!xe_vm_in_lr_mode(vm)) + xe_validation_ctx_fini(&ctx); err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fee22358cc09..37b2b93b73d6 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -12,6 +12,7 @@ #include <drm/drm_file.h> #include <uapi/drm/xe_drm.h> +#include "xe_dep_scheduler.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" @@ -39,6 +40,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue static void __xe_exec_queue_free(struct xe_exec_queue *q) { + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) + if (q->tlb_inval[i].dep_scheduler) + xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); if (q->vm) @@ -50,6 +57,39 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) kfree(q); } +static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) +{ + struct xe_tile *tile = gt_to_tile(q->gt); + int i; + + for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { + struct xe_dep_scheduler *dep_scheduler; + struct xe_gt *gt; + struct workqueue_struct *wq; + + if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) + gt = tile->primary_gt; + else + gt = tile->media_gt; + + if (!gt) + continue; + + wq = gt->tlb_inval.job_wq; + +#define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ + dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, + MAX_TLB_INVAL_JOBS); + if (IS_ERR(dep_scheduler)) + return PTR_ERR(dep_scheduler); + + q->tlb_inval[i].dep_scheduler = dep_scheduler; + } +#undef MAX_TLB_INVAL_JOBS + + return 0; +} + static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, @@ -94,6 +134,14 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { + err = alloc_dep_schedulers(xe, q); + if (err) { + __xe_exec_queue_free(q); + return ERR_PTR(err); + } + } + if (vm) q->vm = xe_vm_get(vm); @@ -151,6 +199,16 @@ err_lrc: return err; } +static void __xe_exec_queue_fini(struct xe_exec_queue *q) +{ + int i; + + q->ops->fini(q); + + for (i = 0; i < q->width; ++i) + xe_lrc_put(q->lrc[i]); +} + struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, struct xe_hw_engine *hwe, u32 flags, @@ -181,11 +239,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (xe_exec_queue_uses_pxp(q)) { err = xe_pxp_exec_queue_add(xe->pxp, q); if (err) - goto err_post_alloc; + goto err_post_init; } return q; +err_post_init: + __xe_exec_queue_fini(q); err_post_alloc: __xe_exec_queue_free(q); return ERR_PTR(err); @@ -283,13 +343,11 @@ void xe_exec_queue_destroy(struct kref *ref) xe_exec_queue_put(eq); } - q->ops->fini(q); + q->ops->destroy(q); } void xe_exec_queue_fini(struct xe_exec_queue *q) { - int i; - /* * Before releasing our ref to lrc and xef, accumulate our run ticks * and wakeup any waiters. @@ -298,9 +356,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) wake_up_var(&q->xef->exec_queue.pending_removal); - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - + __xe_exec_queue_fini(q); __xe_exec_queue_free(q); } @@ -610,7 +666,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) @@ -742,6 +798,21 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, } /** + * xe_exec_queue_lrc() - Get the LRC from exec queue. + * @q: The exec_queue. + * + * Retrieves the primary LRC for the exec queue. Note that this function + * returns only the first LRC instance, even when multiple parallel LRCs + * are configured. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) +{ + return q->lrc[0]; +} + +/** * xe_exec_queue_is_lr() - Whether an exec_queue is long-running * @q: The exec_queue * @@ -1028,3 +1099,51 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) return err; } + +/** + * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references + * within all LRCs of a queue. + * @q: the &xe_exec_queue struct instance containing target LRCs + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure + */ +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) +{ + int i; + int err = 0; + + for (i = 0; i < q->width; ++i) { + xe_lrc_update_memirq_regs_with_address(q->lrc[i], q->hwe, scratch); + xe_lrc_update_hwctx_regs_with_address(q->lrc[i]); + err = xe_lrc_setup_wa_bb_with_scratch(q->lrc[i], q->hwe, scratch); + if (err) + break; + } + + return err; +} + +/** + * xe_exec_queue_jobs_ring_restore - Re-emit ring commands of requests pending on given queue. + * @q: the &xe_exec_queue struct instance + */ +void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + + /* + * This routine is used within VF migration recovery. This means + * using the lock here introduces a restriction: we cannot wait + * for any GFX HW response while the lock is taken. + */ + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + if (xe_sched_job_is_error(job)) + continue; + + q->ring_ops->emit_job(job); + } + spin_unlock(&sched->base.job_list_lock); +} diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index 17bc50a7f05a..15ec852e7f7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -90,4 +90,9 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm); void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q); +int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch); + +void xe_exec_queue_jobs_ring_restore(struct xe_exec_queue *q); + +struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q); #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index cc1cffb5c87f..27b76cf9da89 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -87,6 +87,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) /* flag to indicate low latency hint to guc */ #define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5) +/* for migration (kernel copy, clear, bind) jobs */ +#define EXEC_QUEUE_FLAG_MIGRATE BIT(6) /** * @flags: flags for this exec queue, should statically setup aside from ban @@ -132,6 +134,19 @@ struct xe_exec_queue { struct list_head link; } lr; +#define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT 0 +#define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT 1 +#define XE_EXEC_QUEUE_TLB_INVAL_COUNT (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1) + + /** @tlb_inval: TLB invalidations exec queue state */ + struct { + /** + * @tlb_inval.dep_scheduler: The TLB invalidation + * dependency scheduler + */ + struct xe_dep_scheduler *dep_scheduler; + } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT]; + /** @pxp: PXP info tracking */ struct { /** @pxp.type: PXP session type used by this queue */ @@ -166,8 +181,14 @@ struct xe_exec_queue_ops { int (*init)(struct xe_exec_queue *q); /** @kill: Kill inflight submissions for backend */ void (*kill)(struct xe_exec_queue *q); - /** @fini: Fini exec queue for submission backend */ + /** @fini: Undoes the init() for submission backend */ void (*fini)(struct xe_exec_queue *q); + /** + * @destroy: Destroy exec queue for submission backend. The backend + * function must call xe_exec_queue_fini() (which will in turn call the + * fini() backend function) to ensure the queue is properly cleaned up. + */ + void (*destroy)(struct xe_exec_queue *q); /** @set_priority: Set priority for exec queue */ int (*set_priority)(struct xe_exec_queue *q, enum xe_exec_queue_priority priority); diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 788f56b066b6..f83d421ac9d3 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -385,10 +385,20 @@ err_free: return err; } -static void execlist_exec_queue_fini_async(struct work_struct *w) +static void execlist_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_execlist_exec_queue *exl = q->execlist; + + drm_sched_entity_fini(&exl->entity); + drm_sched_fini(&exl->sched); + + kfree(exl); +} + +static void execlist_exec_queue_destroy_async(struct work_struct *w) { struct xe_execlist_exec_queue *ee = - container_of(w, struct xe_execlist_exec_queue, fini_async); + container_of(w, struct xe_execlist_exec_queue, destroy_async); struct xe_exec_queue *q = ee->q; struct xe_execlist_exec_queue *exl = q->execlist; struct xe_device *xe = gt_to_xe(q->gt); @@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - drm_sched_entity_fini(&exl->entity); - drm_sched_fini(&exl->sched); - kfree(exl); - xe_exec_queue_fini(q); } @@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q) /* NIY */ } -static void execlist_exec_queue_fini(struct xe_exec_queue *q) +static void execlist_exec_queue_destroy(struct xe_exec_queue *q) { - INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); - queue_work(system_unbound_wq, &q->execlist->fini_async); + INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); + queue_work(system_unbound_wq, &q->execlist->destroy_async); } static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, @@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .init = execlist_exec_queue_init, .kill = execlist_exec_queue_kill, .fini = execlist_exec_queue_fini, + .destroy = execlist_exec_queue_destroy, .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h index 415140936f11..92c4ba52db0c 100644 --- a/drivers/gpu/drm/xe/xe_execlist_types.h +++ b/drivers/gpu/drm/xe/xe_execlist_types.h @@ -42,7 +42,7 @@ struct xe_execlist_exec_queue { bool has_run; - struct work_struct fini_async; + struct work_struct destroy_async; enum xe_exec_queue_priority active_priority; struct list_head active_link; diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,41 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = xbasename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +175,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +183,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +195,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7b11fa1356f0..5edc0cad47e2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -23,13 +23,14 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_printk.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_res_cursor.h" #include "xe_sriov.h" +#include "xe_tile_printk.h" #include "xe_tile_sriov_vf.h" +#include "xe_tlb_inval.h" #include "xe_wa.h" #include "xe_wopcm.h" @@ -106,10 +107,10 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev) static void ggtt_update_access_counter(struct xe_ggtt *ggtt) { struct xe_tile *tile = ggtt->tile; - struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ? + struct xe_gt *affected_gt = XE_GT_WA(tile->primary_gt, 22019338487) ? tile->primary_gt : tile->media_gt; struct xe_mmio *mmio = &affected_gt->mmio; - u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; + u32 max_gtt_writes = XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; /* * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit * to wait for completion of prior GTT writes before letting this through. @@ -238,6 +239,13 @@ int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) } EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -262,7 +270,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) gsm_size = probe_gsm_size(pdev); if (gsm_size == 0) { - drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); + xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); return -ENOMEM; } @@ -277,19 +285,26 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (GRAPHICS_VERx100(xe) >= 1270) ggtt->pt_ops = (ggtt->tile->media_gt && - XE_WA(ggtt->tile->media_gt, 22019338487)) || - XE_WA(ggtt->tile->primary_gt, 22019338487) ? + XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || + XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? &xelpg_pt_wa_ops : &xelpg_pt_ops; else ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) @@ -410,7 +425,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -427,9 +442,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) if (!gt) return; - err = xe_gt_tlb_invalidation_ggtt(gt); - if (err) - drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); + err = xe_tlb_inval_ggtt(>->tlb_inval); + xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); } static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) @@ -456,8 +470,8 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); - xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n", - node->start, node->start + node->size, buf, description); + xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", + node->start, node->start + node->size, buf, description); } } @@ -489,9 +503,8 @@ int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 err = drm_mm_reserve_node(&ggtt->mm, &node->base); - if (xe_gt_WARN(ggtt->tile->primary_gt, err, - "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->base.start, node->base.start + node->base.size, ERR_PTR(err))) + if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; xe_ggtt_dump_node(ggtt, &node->base, "balloon"); @@ -682,13 +695,13 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, return; start = node->base.start; - end = start + bo->size; + end = start + xe_bo_size(bo); pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { xe_assert(xe_bo_device(bo), bo->ttm.ttm); - for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur); + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte | xe_res_dma(&cur)); @@ -696,7 +709,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, /* Prepend GPU offset */ pte |= vram_region_gpu_offset(bo->ttm.resource); - for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte + cur.start); @@ -721,7 +734,7 @@ void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; u8 tile_id = ggtt->tile->id; @@ -732,11 +745,11 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } - err = xe_bo_validate(bo, NULL, false); + err = xe_bo_validate(bo, NULL, false, exec); if (err) return err; @@ -751,7 +764,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; @@ -778,25 +791,28 @@ out: * @bo: the &xe_bo to be inserted * @start: address where it will be inserted * @end: end of the range where it will be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end) + u64 start, u64 end, struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); + return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); } /** * xe_ggtt_insert_bo - Insert BO into GGTT * @ggtt: the &xe_ggtt where bo will be inserted * @bo: the &xe_bo to be inserted + * @exec: The drm_exec transaction to use for exhaustive eviction. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, + struct drm_exec *exec) { - return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); + return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); } /** @@ -812,7 +828,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index fbe1e397d05d..75fc7a1efea7 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -10,6 +10,7 @@ struct drm_printer; struct xe_tile; +struct drm_exec; struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile); int xe_ggtt_init_early(struct xe_ggtt *ggtt); @@ -31,9 +32,9 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, struct xe_bo *bo, u16 pat_index); void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo); -int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, - u64 start, u64 end); + u64 start, u64 end, struct drm_exec *exec); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index 869b43a4151d..455ccaf17314 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -101,6 +101,19 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +/** + * xe_sched_submission_stop_async - Stop further runs of submission tasks on a scheduler. + * @sched: the &xe_gpu_scheduler struct instance + * + * This call disables further runs of scheduling work queue. It does not wait + * for any in-progress runs to finish, only makes sure no further runs happen + * afterwards. + */ +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched) +{ + drm_sched_wqueue_stop(&sched->base); +} + void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) { drm_sched_resume_timeout(&sched->base, sched->base.timeout); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 308061f0cf37..e548b2aed95a 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -21,6 +21,7 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_stop_async(struct xe_gpu_scheduler *sched); void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..83d61bf8ec62 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { @@ -134,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc) u64 ggtt_offset; int err; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); @@ -264,7 +266,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) unsigned int fw_ref; int ret; - if (XE_WA(tile->primary_gt, 14018094691)) { + if (XE_GT_WA(tile->primary_gt, 14018094691)) { fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -279,7 +281,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(tile->primary_gt, 14018094691)) + if (XE_GT_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref); if (ret) @@ -591,7 +593,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep) u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP; /* WA only applies if the GSC is loaded */ - if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) + if (!XE_GT_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt)) return; xe_mmio_rmw32(>->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9752a38c0162..3e0ad7e5b5df 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -37,10 +37,10 @@ #include "xe_gt_sriov_pf.h" #include "xe_gt_sriov_vf.h" #include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_pc.h" +#include "xe_guc_submit.h" #include "xe_hw_fence.h" #include "xe_hw_engine_class_sysfs.h" #include "xe_irq.h" @@ -57,6 +57,7 @@ #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_tlb_inval.h" #include "xe_tuning.h" #include "xe_uc.h" #include "xe_uc_fw.h" @@ -97,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt) * FIXME: if xe_uc_sanitize is called here, on TGL driver will not * reload */ - gt->uc.guc.submission_state.enabled = false; + xe_guc_submit_disable(>->uc.guc); } static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) @@ -105,14 +106,14 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); @@ -127,7 +128,7 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) unsigned int fw_ref; u32 reg; - if (!XE_WA(gt, 16023588340)) + if (!XE_GT_WA(gt, 16023588340)) return; if (xe_gt_is_media_type(gt)) @@ -146,30 +147,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,27 +172,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,13 +205,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + cs = bb->cs; if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -229,79 +247,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; - - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; + + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; + + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + bb->len = cs - bb->cs; - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -363,14 +370,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -390,6 +389,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -400,7 +400,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); - err = xe_wa_init(gt); + err = xe_wa_gt_init(gt); if (err) return err; @@ -408,17 +408,36 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_oob(gt); + xe_wa_process_gt_oob(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); - err = xe_gt_tlb_invalidation_init_early(gt); + err = xe_gt_tlb_inval_init_early(gt); if (err) return err; xe_mocs_init_early(gt); + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -433,7 +452,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -442,7 +461,15 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -457,8 +484,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -479,13 +508,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -518,7 +546,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -534,17 +562,15 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); + err = xe_migrate_init(tile->migrate); + if (err) goto err_force_wake; - } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -554,7 +580,7 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -572,39 +598,6 @@ err_force_wake: return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; @@ -632,15 +625,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -654,7 +647,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; @@ -742,7 +735,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -780,11 +773,11 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) @@ -810,6 +803,11 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } +static int gt_wait_reset_unblock(struct xe_gt *gt) +{ + return xe_guc_wait_reset_unblock(>->uc.guc); +} + static int gt_reset(struct xe_gt *gt) { unsigned int fw_ref; @@ -824,6 +822,10 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + err = gt_wait_reset_unblock(gt); + if (!err) + xe_gt_warn(gt, "reset block failed to get lifted"); + xe_pm_runtime_get(gt_to_xe(gt)); if (xe_fault_inject_gt_reset()) { @@ -839,13 +841,16 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); xe_uc_stop(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); err = do_gt_reset(gt); if (err) @@ -961,7 +966,7 @@ int xe_gt_sanitize_freq(struct xe_gt *gt) if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || xe_uc_fw_is_loaded(>->uc.gsc.fw) || xe_uc_fw_is_in_error_state(>->uc.gsc.fw)) && - XE_WA(gt, 22019338487)) + XE_GT_WA(gt, 22019338487)) ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); return ret; @@ -1059,5 +1064,5 @@ void xe_gt_declare_wedged(struct xe_gt *gt) xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); xe_uc_declare_wedged(>->uc); - xe_gt_tlb_invalidation_reset(gt); + xe_tlb_inval_reset(>->tlb_inval); } diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..41880979f4de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,11 +24,10 @@ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); @@ -107,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 848618acdca8..f253e2df4907 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -29,7 +29,9 @@ #include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" +#include "xe_sa.h" #include "xe_sriov.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tuning.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -122,18 +124,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p) return ret; } -static int sa_info(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_tile *tile = gt_to_tile(gt); - - xe_pm_runtime_get(gt_to_xe(gt)); - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, - tile->mem.kernel_bb_pool->gpu_addr); - xe_pm_runtime_put(gt_to_xe(gt)); - - return 0; -} - static int topology(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -288,7 +278,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p) * - without access to the PF specific data */ static const struct drm_info_list vf_safe_debugfs_list[] = { - {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, @@ -299,7 +288,6 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, - {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; @@ -328,6 +316,24 @@ static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t return count; } +static ssize_t stats_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *s = file->private_data; + struct xe_gt *gt = s->private; + + return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt); +} + +static int stats_show(struct seq_file *s, void *unused) +{ + struct drm_printer p = drm_seq_file_printer(s); + struct xe_gt *gt = s->private; + + return xe_gt_stats_print_info(gt, &p); +} +DEFINE_SHOW_STORE_ATTRIBUTE(stats); + static void force_reset(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -388,13 +394,18 @@ void xe_gt_debugfs_register(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; + struct dentry *parent = gt->tile->debugfs; struct dentry *root; + char symlink[16]; char name[8]; xe_gt_assert(gt, minor->debugfs_root); + if (IS_ERR(parent)) + return; + snprintf(name, sizeof(name), "gt%d", gt->info.id); - root = debugfs_create_dir(name, minor->debugfs_root); + root = debugfs_create_dir(name, parent); if (IS_ERR(root)) { drm_warn(&xe->drm, "Create GT directory failed"); return; @@ -408,6 +419,7 @@ void xe_gt_debugfs_register(struct xe_gt *gt) root->d_inode->i_private = gt; /* VF safe */ + debugfs_create_file("stats", 0600, root, gt, &stats_fops); debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops); debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops); @@ -426,4 +438,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) xe_gt_sriov_pf_debugfs_register(gt, root); else if (IS_SRIOV_VF(xe)) xe_gt_sriov_vf_debugfs_register(gt, root); + + /* + * Backwards compatibility only: create a link for the legacy clients + * who may expect gt/ directory at the root level, not the tile level. + */ + snprintf(symlink, sizeof(symlink), "tile%u/%s", gt->tile->id, name); + debugfs_create_symlink(name, minor->debugfs_root, symlink); } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 60d9354e7dbf..4ff1b6b58d6b 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -227,6 +227,33 @@ static ssize_t max_freq_store(struct kobject *kobj, } static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq); +static ssize_t power_profile_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct device *dev = kobj_to_dev(kobj); + + xe_guc_pc_get_power_profile(dev_to_pc(dev), buff); + + return strlen(buff); +} + +static ssize_t power_profile_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buff, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct xe_guc_pc *pc = dev_to_pc(dev); + int err; + + xe_pm_runtime_get(dev_to_xe(dev)); + err = xe_guc_pc_set_power_profile(pc, buff); + xe_pm_runtime_put(dev_to_xe(dev)); + + return err ?: count; +} +static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile); + static const struct attribute *freq_attrs[] = { &attr_act_freq.attr, &attr_cur_freq.attr, @@ -236,6 +263,7 @@ static const struct attribute *freq_attrs[] = { &attr_rpn_freq.attr, &attr_min_freq.attr, &attr_max_freq.attr, + &attr_power_profile.attr, NULL }; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,9 +121,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } @@ -322,15 +330,11 @@ static void gt_idle_fini(void *arg) { struct kobject *kobj = arg; struct xe_gt *gt = kobj_to_gt(kobj->parent); - unsigned int fw_ref; xe_gt_idle_disable_pg(gt); - if (gt_to_xe(gt)->info.skip_guc_pc) { - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (gt_to_xe(gt)->info.skip_guc_pc) xe_gt_idle_disable_c6(gt); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } sysfs_remove_files(kobj, gt_idle_attrs); kobject_put(kobj); @@ -390,14 +394,23 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE); } -void xe_gt_idle_disable_c6(struct xe_gt *gt) +int xe_gt_idle_disable_c6(struct xe_gt *gt) { + unsigned int fw_ref; + xe_device_assert_mem_access(gt_to_xe(gt)); - xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); if (IS_SRIOV_VF(gt_to_xe(gt))) - return; + return 0; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; xe_mmio_write32(>->mmio, RC_CONTROL, 0); xe_mmio_write32(>->mmio, RC_STATE, 0); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 591a01e181bc..9c34a155e102 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -13,7 +13,7 @@ struct xe_gt; int xe_gt_idle_init(struct xe_gt_idle *gtidle); void xe_gt_idle_enable_c6(struct xe_gt *gt); -void xe_gt_idle_disable_c6(struct xe_gt *gt); +int xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d4d9730f0d2c..8fb1cae91724 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -46,8 +46,6 @@ * MCR registers are not available on Virtual Function (VF). */ -#define STEER_SEMAPHORE XE_REG(0xFD0) - static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr) { return reg_mcr.__reg; @@ -364,7 +362,7 @@ fallback: * @group: pointer to storage for steering group ID * @instance: pointer to storage for steering instance ID */ -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); @@ -420,12 +418,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) -{ - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ -} - static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -436,7 +428,7 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -446,25 +438,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -505,10 +489,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -530,7 +531,7 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); - xe_mmio_write32(>->mmio, MCFG_MCR_SELECTOR, steer_val); + xe_mmio_write32(>->mmio, STEER_SEMAPHORE, steer_val); xe_mmio_write32(>->mmio, SF_MCR_SELECTOR, steer_val); /* * For GAM registers, all reads should be directed to instance 1 @@ -570,6 +571,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index bc06520befab..283a1c9770e2 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, u8 *group, u8 *instance); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); +void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, + unsigned int dss, u16 *group, u16 *instance); u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); /* diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 3522865c67c9..a054d6010ae0 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -16,13 +16,13 @@ #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_migrate.h" #include "xe_svm.h" #include "xe_trace_bo.h" #include "xe_vm.h" +#include "xe_vram_types.h" struct pagefault { u64 page_addr; @@ -74,7 +74,7 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) } static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, - bool atomic, unsigned int id) + bool need_vram_move, struct xe_vram_region *vram) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -84,24 +84,11 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, if (err) return err; - if (atomic && IS_DGFX(vm->xe)) { - if (xe_vma_is_userptr(vma)) { - err = -EACCES; - return err; - } - - /* Migrate to VRAM, move should invalidate the VMA first */ - err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); - if (err) - return err; - } else if (bo) { - /* Create backing store if needed */ - err = xe_bo_validate(bo, vm, true); - if (err) - return err; - } + if (!bo) + return 0; - return 0; + return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : + xe_bo_validate(bo, vm, true, exec); } static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, @@ -109,13 +96,17 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, { struct xe_vm *vm = xe_vma_vm(vma); struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; struct drm_exec exec; struct dma_fence *fence; - ktime_t end = 0; - int err; + int err, needs_vram; lockdep_assert_held_write(&vm->lock); + needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) + return needs_vram < 0 ? needs_vram : -EACCES; + xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); @@ -136,22 +127,22 @@ retry_userptr: } /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); drm_exec_until_all_locked(&exec) { - err = xe_pf_begin(&exec, vma, atomic, tile->id); + err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); drm_exec_retry_on_contention(&exec); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; + xe_validation_retry_on_oom(&ctx, &err); if (err) goto unlock_dma_resv; /* Bind VMA only to the GT that has faulted */ trace_xe_vma_pf_bind(vma); + xe_vm_set_validation_exec(vm, &exec); fence = xe_vma_rebind(vm, vma, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { err = PTR_ERR(fence); - if (xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; + xe_validation_retry_on_oom(&ctx, &err); goto unlock_dma_resv; } } @@ -160,7 +151,7 @@ retry_userptr: dma_fence_put(fence); unlock_dma_resv: - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); if (err == -EAGAIN) goto retry_userptr; @@ -419,6 +410,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; @@ -541,6 +533,7 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) { struct xe_device *xe = gt_to_xe(gt); struct xe_tile *tile = gt_to_tile(gt); + struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vm *vm; struct xe_vma *vma; @@ -570,15 +563,14 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) goto unlock_vm; /* Lock VM and BOs dma-resv */ - drm_exec_init(&exec, 0, 0); + xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); drm_exec_until_all_locked(&exec) { - ret = xe_pf_begin(&exec, vma, true, tile->id); + ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); drm_exec_retry_on_contention(&exec); - if (ret) - break; + xe_validation_retry_on_oom(&ctx, &ret); } - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); unlock_vm: up_read(&vm->lock); xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 11da0228cea7..1313d32862db 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -6,18 +6,22 @@ #ifndef _XE_GT_PRINTK_H_ #define _XE_GT_PRINTK_H_ -#include <drm/drm_print.h> - #include "xe_gt_types.h" +#include "xe_tile_printk.h" + +#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args #define xe_gt_printk(_gt, _level, _fmt, ...) \ - drm_##_level(>_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) + +#define xe_gt_err(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) #define xe_gt_err_once(_gt, _fmt, ...) \ xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__) -#define xe_gt_err(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__) +#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ + xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) #define xe_gt_warn(_gt, _fmt, ...) \ xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__) @@ -31,20 +35,20 @@ #define xe_gt_dbg(_gt, _fmt, ...) \ xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__) -#define xe_gt_err_ratelimited(_gt, _fmt, ...) \ - xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__) +#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \ + xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__) #define xe_gt_WARN(_gt, _condition, _fmt, ...) \ - drm_WARN(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \ - drm_WARN_ONCE(>_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__)) #define xe_gt_WARN_ON(_gt, _condition) \ - xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition)) + xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) #define xe_gt_WARN_ON_ONCE(_gt, _condition) \ - xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) + xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf) { @@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format * /* * The original xe_gt_dbg() callsite annotations are useless here, - * redirect to the tweaked drm_dbg_printer() instead. + * redirect to the tweaked xe_tile_dbg_printer() instead. */ - dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg = xe_tile_dbg_printer((gt)->tile); dbg.origin = p->origin; - drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); + drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf)); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..c4dda87b47cc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -16,6 +16,7 @@ #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_pm.h" @@ -47,9 +48,21 @@ static int pf_alloc_metadata(struct xe_gt *gt) static void pf_init_workers(struct xe_gt *gt) { + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); } +static void pf_fini_workers(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (disable_work_sync(>->sriov.pf.workers.restart)) { + xe_gt_sriov_dbg_verbose(gt, "pending restart disabled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -79,6 +92,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static void pf_fini_action(void *arg) +{ + struct xe_gt *gt = arg; + + pf_fini_workers(gt); +} + +static int pf_init_late(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt); +} + /** * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -95,7 +123,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt) if (err) return err; - return xe_gt_sriov_pf_migration_init(gt); + err = xe_gt_sriov_pf_migration_init(gt); + if (err) + return err; + + err = pf_init_late(gt); + if (err) + return err; + + return 0; } static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) @@ -172,13 +208,38 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) { + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); + /* release an rpm reference taken on the worker's behalf */ + xe_pm_runtime_put(gt_to_xe(gt)); + } +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - xe_pm_runtime_get(xe); + xe_gt_assert(gt, !xe_pm_runtime_suspended(xe)); + xe_gt_sriov_pf_config_restart(gt); xe_gt_sriov_pf_control_restart(gt); + + /* release an rpm reference taken on our behalf */ xe_pm_runtime_put(xe); xe_gt_sriov_dbg(gt, "restart completed\n"); @@ -197,8 +258,13 @@ static void pf_queue_restart(struct xe_gt *gt) xe_gt_assert(gt, IS_SRIOV_PF(xe)); - if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) + /* take an rpm reference on behalf of the worker */ + xe_pm_runtime_get_noresume(xe); + + if (!queue_work(xe->sriov.wq, >->sriov.pf.workers.restart)) { xe_gt_sriov_dbg(gt, "restart already in queue!\n"); + xe_pm_runtime_put(xe); + } } /** @@ -211,3 +277,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { pf_queue_restart(gt); } + +static void pf_flush_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + flush_work(>->sriov.pf.workers.restart); +} + +/** + * xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) +{ + /* don't wait if there is another ongoing reset */ + if (xe_guc_read_stopped(>->uc.guc)) + return -EBUSY; + + pf_flush_restart(gt); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e7fde3f9937a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,8 +11,10 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +31,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3556c41c041b..6344b5205c08 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -33,6 +33,7 @@ #include "xe_migrate.h" #include "xe_sriov.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wopcm.h" #define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) @@ -104,13 +105,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -238,26 +239,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -282,8 +292,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -332,6 +342,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +397,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +409,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +464,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +513,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +581,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +643,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +714,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1299,7 +1320,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1327,7 +1348,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1388,7 +1419,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1403,16 +1434,19 @@ fail: return err; } -static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) +/* Return: %true if there was an LMEM provisioned, %false otherwise */ +static bool pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { xe_bo_unpin_map_no_vm(config->lmem_obj); config->lmem_obj = NULL; + return true; } + return false; } static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1425,7 +1459,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1444,23 +1478,16 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) return 0; xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); - bo = xe_bo_create_locked(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_NEEDS_2M | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_PINNED_LATE_RESTORE); + bo = xe_bo_create_pin_range_novm(xe, tile, + ALIGN(size, PAGE_SIZE), 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_NEEDS_2M | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE); if (IS_ERR(bo)) return PTR_ERR(bo); - err = xe_bo_pin(bo); - xe_bo_unlock(bo); - if (unlikely(err)) { - xe_bo_put(bo); - return err; - } - config->lmem_obj = bo; if (xe_device_has_lmtt(xe)) { @@ -1469,12 +1496,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -1552,7 +1579,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1574,7 +1601,7 @@ static u64 pf_query_free_lmem(struct xe_gt *gt) { struct xe_tile *tile = gt->tile; - return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager); + return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager); } static u64 pf_query_max_lmem(struct xe_gt *gt) @@ -1602,7 +1629,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) u64 fair; fair = div_u64(available, num_vfs); - fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ fair = ALIGN_DOWN(fair, alignment); #ifdef MAX_FAIR_LMEM fair = min_t(u64, MAX_FAIR_LMEM, fair); @@ -1629,7 +1655,7 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; @@ -1663,7 +1689,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1990,12 +2016,13 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); + bool released; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { - pf_release_vf_config_lmem(gt, config); - if (xe_device_has_lmtt(xe)) + released = pf_release_vf_config_lmem(gt, config); + if (released && xe_device_has_lmtt(xe)) pf_update_vf_lmtt(xe, vfid); } } @@ -2082,7 +2109,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2149,7 +2176,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2349,7 +2376,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); @@ -2366,6 +2393,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2389,9 +2445,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2409,6 +2474,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; @@ -2552,10 +2621,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 13970d5a2867..3ed245e04d0c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -22,6 +22,7 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" +#include "xe_sriov_pf.h" /* * /sys/kernel/debug/dri/0/ @@ -78,11 +79,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_service_print_runtime, }, { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, - { "adverse_events", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_monitor_print_events, @@ -210,7 +206,8 @@ static int CONFIG##_set(void *data, u64 val) \ return -EOVERFLOW; \ \ xe_pm_runtime_get(xe); \ - err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -305,7 +302,7 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); if (xe_device_has_lmtt(gt_to_xe(gt))) @@ -554,7 +551,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), pfdentry, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index c712111aa30d..44cc612b0a75 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -55,12 +55,12 @@ static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(xe, tile, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -91,12 +91,12 @@ static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, size % sizeof(u32) == 0); xe_gt_assert(gt, size == ndwords * sizeof(u32)); - bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(xe, tile, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 821cfcc34e6b..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -266,7 +182,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } @@ -285,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 9b2fc9db55b8..0461d5513487 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -25,6 +25,7 @@ #include "xe_guc.h" #include "xe_guc_hxg_helpers.h" #include "xe_guc_relay.h" +#include "xe_lrc.h" #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_vf.h" @@ -552,7 +553,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -649,7 +650,7 @@ s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return config->ggtt_shift; } @@ -686,21 +687,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -716,13 +718,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -750,6 +752,19 @@ failed: } /** + * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs. + * @gt: the &xe_gt struct instance + */ +void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + for_each_hw_engine(hwe, gt, id) + xe_default_lrc_update_memirq_regs_with_address(hwe); +} + +/** * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, * or just mark that a GuC is ready for it. * @gt: the &xe_gt struct instance linked to target GuC @@ -775,10 +790,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -966,7 +983,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { @@ -1037,7 +1053,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } @@ -1073,9 +1089,10 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { + struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e0357f341a2d..0af1dc769fe0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -21,6 +21,7 @@ void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt, int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); +void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt); int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index ef041679e9d4..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -10,16 +10,6 @@ #include "xe_uc_fw_types.h" /** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; - -/** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ struct xe_gt_sriov_vf_selfconfig { @@ -66,8 +56,6 @@ struct xe_gt_sriov_vf { struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c index 30f942671c2b..5f74706bab81 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.c +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) atomic64_add(incr, >->stats.counters[id]); } +#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name + static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { - "svm_pagefault_count", - "tlb_inval_count", - "vma_pagefault_count", - "vma_pagefault_kb", + DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"), + DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"), + DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"), + DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"), + DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"), + DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"), + DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"), + DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"), + DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"), + DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"), + DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"), + DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"), + DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"), + DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"), + DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"), + DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"), + DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"), + DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"), + DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"), + DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"), + DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"), + DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"), + DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"), + DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"), + DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"), + DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"), + DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"), + DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"), + DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"), + DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"), + DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"), + DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"), }; /** @@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) return 0; } + +/** + * xe_gt_stats_clear - Clear the GT stats + * @gt: GT structure + * + * This clear (zeros) all the available GT stats. + */ +void xe_gt_stats_clear(struct xe_gt *gt) +{ + int id; + + for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id) + atomic64_set(>->stats.counters[id], 0); +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 38325ef53617..e8aea32bc971 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -13,6 +13,7 @@ struct drm_printer; #ifdef CONFIG_DEBUG_FS int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_clear(struct xe_gt *gt); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); #else static inline void diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h index be3244d7133c..d8348a8de2e1 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats_types.h +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -9,8 +9,41 @@ enum xe_gt_stats_id { XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, XE_GT_STATS_ID_TLB_INVAL, + XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, + XE_GT_STATS_ID_SVM_TLB_INVAL_US, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT, + XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US, + XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT, + XE_GT_STATS_ID_SVM_4K_MIGRATE_US, + XE_GT_STATS_ID_SVM_64K_MIGRATE_US, + XE_GT_STATS_ID_SVM_2M_MIGRATE_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + XE_GT_STATS_ID_SVM_CPU_COPY_US, + XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, + XE_GT_STATS_ID_SVM_CPU_COPY_KB, + XE_GT_STATS_ID_SVM_4K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_64K_GET_PAGES_US, + XE_GT_STATS_ID_SVM_2M_GET_PAGES_US, + XE_GT_STATS_ID_SVM_4K_BIND_US, + XE_GT_STATS_ID_SVM_64K_BIND_US, + XE_GT_STATS_ID_SVM_2M_BIND_US, /* must be the last entry */ __XE_GT_STATS_NUM_IDS, }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c deleted file mode 100644 index 6088df8e159c..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ /dev/null @@ -1,562 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "xe_gt_tlb_invalidation.h" - -#include "abi/guc_actions_abi.h" -#include "xe_device.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_gt_stats.h" -#include "xe_mmio.h" -#include "xe_pm.h" -#include "xe_sriov.h" -#include "xe_trace.h" -#include "regs/xe_guc_regs.h" - -#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS - -/* - * TLB inval depends on pending commands in the CT queue and then the real - * invalidation time. Double up the time to process full CT queue - * just to be on the safe side. - */ -static long tlb_timeout_jiffies(struct xe_gt *gt) -{ - /* this reflects what HW/GuC needs to process TLB inv request */ - const long hw_tlb_timeout = HZ / 4; - - /* this estimates actual delay caused by the CTB transport */ - long delay = xe_guc_ct_queue_proc_time_jiffies(>->uc.guc.ct); - - return hw_tlb_timeout + 2 * delay; -} - -static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - xe_pm_runtime_put(gt_to_xe(fence->gt)); - fence->gt = NULL; /* fini() should be called once */ -} - -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); - - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - xe_gt_tlb_invalidation_fence_fini(fence); - dma_fence_signal(&fence->base); - if (!stack) - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) -{ - if (WARN_ON_ONCE(!fence->gt)) - return; - - __invalidation_fence_signal(gt_to_xe(fence->gt), fence); -} - -static void xe_gt_tlb_fence_timeout(struct work_struct *work) -{ - struct xe_gt *gt = container_of(work, struct xe_gt, - tlb_invalidation.fence_tdr.work); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - - LNL_FLUSH_WORK(>->uc.guc.ct.g2h_worker); - - spin_lock_irq(>->tlb_invalidation.pending_lock); - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - s64 since_inval_ms = ktime_ms_delta(ktime_get(), - fence->invalidation_time); - - if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt)) - break; - - trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); - xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", - fence->seqno, gt->tlb_invalidation.seqno_recv); - - fence->base.error = -ETIME; - invalidation_fence_signal(xe, fence); - } - if (!list_empty(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - spin_unlock_irq(>->tlb_invalidation.pending_lock); -} - -/** - * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state - * @gt: GT structure - * - * Initialize GT TLB invalidation state, purely software initialization, should - * be called once during driver load. - * - * Return: 0 on success, negative error code on error. - */ -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt) -{ - gt->tlb_invalidation.seqno = 1; - INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); - spin_lock_init(>->tlb_invalidation.pending_lock); - spin_lock_init(>->tlb_invalidation.lock); - INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, - xe_gt_tlb_fence_timeout); - - return 0; -} - -/** - * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: GT structure - * - * Signal any pending invalidation fences, should be called during a GT reset - */ -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) -{ - struct xe_gt_tlb_invalidation_fence *fence, *next; - int pending_seqno; - - /* - * we can get here before the CTs are even initialized if we're wedging - * very early, in which case there are not going to be any pending - * fences so we can bail immediately. - */ - if (!xe_guc_ct_initialized(>->uc.guc.ct)) - return; - - /* - * CT channel is already disabled at this point. No new TLB requests can - * appear. - */ - - mutex_lock(>->uc.guc.ct.lock); - spin_lock_irq(>->tlb_invalidation.pending_lock); - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - /* - * We might have various kworkers waiting for TLB flushes to complete - * which are not tracked with an explicit TLB fence, however at this - * stage that will never happen since the CT is already disabled, so - * make sure we signal them here under the assumption that we have - * completed a full GT reset. - */ - if (gt->tlb_invalidation.seqno == 1) - pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; - else - pending_seqno = gt->tlb_invalidation.seqno - 1; - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(gt_to_xe(gt), fence); - spin_unlock_irq(>->tlb_invalidation.pending_lock); - mutex_unlock(>->uc.guc.ct.lock); -} - -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); - - if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) - return false; - - if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return seqno_recv >= seqno; -} - -static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence, - u32 *action, int len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - int seqno; - int ret; - - xe_gt_assert(gt, fence); - - /* - * XXX: The seqno algorithm relies on TLB invalidation being processed - * in order which they currently are, if that changes the algorithm will - * need to be updated. - */ - - mutex_lock(&guc->ct.lock); - seqno = gt->tlb_invalidation.seqno; - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - action[1] = seqno; - ret = xe_guc_ct_send_locked(&guc->ct, action, len, - G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret) { - spin_lock_irq(>->tlb_invalidation.pending_lock); - /* - * We haven't actually published the TLB fence as per - * pending_fences, but in theory our seqno could have already - * been written as we acquired the pending_lock. In such a case - * we can just go ahead and signal the fence here. - */ - if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(xe, fence); - } else { - fence->invalidation_time = ktime_get(); - list_add_tail(&fence->link, - >->tlb_invalidation.pending_fences); - - if (list_is_singular(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - } - spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else { - __invalidation_fence_signal(xe, fence); - } - if (!ret) { - gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->tlb_invalidation.seqno) - gt->tlb_invalidation.seqno = 1; - } - mutex_unlock(&guc->ct.lock); - xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); - - return ret; -} - -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) - -/** - * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * - * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use the invalidation fence to wait for completion. - * - * Return: 0 on success, negative error code on error - */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, /* seqno, replaced in send_tlb_invalidation */ - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), - }; - int ret; - - ret = send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); - /* - * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches - * should be nuked on a GT reset so this error can be ignored. - */ - if (ret == -ECANCELED) - return 0; - - return ret; -} - -/** - * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: GT structure - * - * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is - * synchronous. - * - * Return: 0 on success, negative error code on error - */ -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref; - - if (xe_guc_ct_enabled(>->uc.guc.ct) && - gt->uc.guc.submission_state.enabled) { - struct xe_gt_tlb_invalidation_fence fence; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret) - return ret; - - xe_gt_tlb_invalidation_fence_wait(&fence); - } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { - struct xe_mmio *mmio = >->mmio; - - if (IS_SRIOV_VF(xe)) - return 0; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else { - xe_mmio_write32(mmio, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - xe_force_wake_put(gt_to_fw(gt), fw_ref); - } - - return 0; -} - -/* - * Ensure that roundup_pow_of_two(length) doesn't overflow. - * Note that roundup_pow_of_two() operates on unsigned long, - * not on u64. - */ -#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) - -/** - * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an - * address range - * - * @gt: GT structure - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * @start: start address - * @end: end address - * @asid: address space id - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid) -{ - struct xe_device *xe = gt_to_xe(gt); -#define MAX_TLB_INVALIDATION_LEN 7 - u32 action[MAX_TLB_INVALIDATION_LEN]; - u64 length = end - start; - int len = 0; - - xe_gt_assert(gt, fence); - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) { - __invalidation_fence_signal(xe, fence); - return 0; - } - - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation || - length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); - } else { - u64 orig_start = start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); - - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = asid; - action[len++] = lower_32_bits(start); - action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); - } - - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - - return send_tlb_invalidation(>->uc.guc, fence, action, len); -} - -/** - * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM - * @gt: graphics tile - * @vm: VM to invalidate - * - * Invalidate entire VM's address space - */ -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm) -{ - struct xe_gt_tlb_invalidation_fence fence; - u64 range = 1ull << vm->xe->info.va_bits; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - - ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid); - if (ret < 0) - return; - - xe_gt_tlb_invalidation_fence_wait(&fence); -} - -/** - * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler - * @guc: guc - * @msg: message indicating TLB invalidation done - * @len: length of message - * - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any - * invalidation fences for seqno. Algorithm for this depends on seqno being - * received in-order and asserts this assumption. - * - * Return: 0 on success, -EPROTO for malformed messages. - */ -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - unsigned long flags; - - if (unlikely(len != 1)) - return -EPROTO; - - /* - * This can also be run both directly from the IRQ handler and also in - * process_g2h_msg(). Only one may process any individual CT message, - * however the order they are processed here could result in skipping a - * seqno. To handle that we just process all the seqnos from the last - * seqno_recv up to and including the one in msg[0]. The delta should be - * very small so there shouldn't be much of pending_fences we actually - * need to iterate over here. - * - * From GuC POV we expect the seqnos to always appear in-order, so if we - * see something later in the timeline we can be sure that anything - * appearing earlier has already signalled, just that we have yet to - * officially process the CT message like if racing against - * process_g2h_msg(). - */ - spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); - if (tlb_invalidation_seqno_past(gt, msg[0])) { - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - return 0; - } - - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); - - if (!tlb_invalidation_seqno_past(gt, fence->seqno)) - break; - - invalidation_fence_signal(xe, fence); - } - - if (!list_empty(>->tlb_invalidation.pending_fences)) - mod_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - else - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - - return 0; -} - -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - -/** - * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence - * @gt: GT - * @fence: TLB invalidation fence to initialize - * @stack: fence is stack variable - * - * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * will be automatically called when fence is signalled (all fences must signal), - * even on error. - */ -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack) -{ - xe_pm_runtime_get_noresume(gt_to_xe(gt)); - - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&fence->base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - INIT_LIST_HEAD(&fence->link); - if (stack) - set_bit(FENCE_STACK_BIT, &fence->base.flags); - else - dma_fence_get(&fence->base); - fence->gt = gt; -} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h deleted file mode 100644 index 31072dbcad8e..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_H_ -#define _XE_GT_TLB_INVALIDATION_H_ - -#include <linux/types.h> - -#include "xe_gt_tlb_invalidation_types.h" - -struct xe_gt; -struct xe_guc; -struct xe_vm; -struct xe_vma; - -int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); - -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid); -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); - -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack); -void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); - -static inline void -xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) -{ - dma_fence_wait(&fence->base, false); -} - -#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h deleted file mode 100644 index de6e825e0851..000000000000 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_ -#define _XE_GT_TLB_INVALIDATION_TYPES_H_ - -#include <linux/dma-fence.h> - -struct xe_gt; - -/** - * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence - * - * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB - * invalidation completion. - */ -struct xe_gt_tlb_invalidation_fence { - /** @base: dma fence base */ - struct dma_fence base; - /** @gt: GT which fence belong to */ - struct xe_gt *gt; - /** @link: link into list of pending tlb fences */ - struct list_head link; - /** @seqno: seqno of TLB invalidation to signal fence one */ - int seqno; - /** @invalidation_time: time of TLB invalidation */ - ktime_t invalidation_time; -}; - -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 305939c69747..4e61c5e39bcb 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -12,6 +12,7 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_mmio.h" #include "xe_wa.h" @@ -122,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, } } +bool xe_gt_topology_report_l3(struct xe_gt *gt) +{ + /* + * No known userspace needs/uses the L3 bank mask reported by + * the media GT, and the hardware itself is known to report bogus + * values on several platforms. Only report L3 bank mask as part + * of the media GT's topology on pre-Xe3 platforms since that's + * already part of our ABI. + */ + if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30) + return false; + + return true; +} + static void load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) { @@ -129,16 +145,7 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) struct xe_mmio *mmio = >->mmio; u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3); - /* - * PTL platforms with media version 30.00 do not provide proper values - * for the media GT's L3 bank registers. Skip the readout since we - * don't have any way to obtain real values. - * - * This may get re-described as an official workaround in the future, - * but there's no tracking number assigned yet so we use a custom - * OOB workaround descriptor. - */ - if (XE_WA(gt, no_media_l3)) + if (!xe_gt_topology_report_l3(gt)) return; if (GRAPHICS_VER(xe) >= 30) { @@ -275,8 +282,9 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU type: %s\n", eu_type_to_str(gt->fuse_topo.eu_type)); - drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, - gt->fuse_topo.l3_bank_mask); + if (xe_gt_topology_report_l3(gt)) + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, + gt->fuse_topo.l3_bank_mask); } /* @@ -290,11 +298,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check @@ -333,3 +336,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss) { return test_bit(dss, gt->fuse_topo.c_dss_mask); } + +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt) +{ + unsigned int xecore; + int last_group = -1; + u16 group, instance; + + for_each_dss_steering(xecore, gt, group, instance) { + if (last_group != group) { + if (group - last_group > 1) + return true; + last_group = group; + } + } + return false; +} diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..5e62f5949b7b 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,12 +41,14 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss); bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss); +bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt); + +bool xe_gt_topology_report_l3(struct xe_gt *gt); + #endif /* _XE_GT_TOPOLOGY_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..66158105aca5 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -17,6 +17,7 @@ #include "xe_oa_types.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" +#include "xe_tlb_inval_types.h" #include "xe_uc_types.h" struct xe_exec_queue_ops; @@ -185,34 +186,8 @@ struct xe_gt { struct work_struct worker; } reset; - /** @tlb_invalidation: TLB invalidation state */ - struct { - /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ -#define TLB_INVALIDATION_SEQNO_MAX 0x100000 - int seqno; - /** - * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, - * protected by CT lock - */ - int seqno_recv; - /** - * @tlb_invalidation.pending_fences: list of pending fences waiting TLB - * invaliations, protected by CT lock - */ - struct list_head pending_fences; - /** - * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences - * and updating @tlb_invalidation.seqno_recv. - */ - spinlock_t pending_lock; - /** - * @tlb_invalidation.fence_tdr: schedules a delayed call to - * xe_gt_tlb_fence_timeout after the timeut interval is over. - */ - struct delayed_work fence_tdr; - /** @tlb_invalidation.lock: protects TLB invalidation fences */ - spinlock_t lock; - } tlb_invalidation; + /** @tlb_inval: TLB invalidation state */ + struct xe_tlb_inval tlb_inval; /** * @ccs_mode: Number of compute engines enabled. @@ -377,6 +352,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** @@ -409,7 +386,7 @@ struct xe_gt { unsigned long *oob; /** * @wa_active.oob_initialized: mark oob as initialized to help - * detecting misuse of XE_WA() - it can only be called on + * detecting misuse of XE_GT_WA() - it can only be called on * initialization after OOB WAs have being processed */ bool oob_initialized; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 209e5d53c290..00789844ea4d 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -16,6 +16,7 @@ #include "regs/xe_guc_regs.h" #include "regs/xe_irq_regs.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" @@ -29,6 +30,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -59,7 +61,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -72,19 +74,22 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc) if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) flags |= GUC_LOG_DISABLED; else - flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << - GUC_LOG_VERBOSITY_SHIFT; + flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level)); return flags; } static u32 guc_ctl_feature_flags(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); u32 flags = GUC_CTL_ENABLE_LITE_RESTORE; - if (!guc_to_xe(guc)->info.skip_guc_pc) + if (!xe->info.skip_guc_pc) flags |= GUC_CTL_ENABLE_SLPC; + if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev))) + flags |= GUC_CTL_ENABLE_PSMI_LOGGING; + return flags; } @@ -116,22 +121,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT)); - BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) > - (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); - BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) > - (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT)); - flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | CAPTURE_FLAG | LOG_FLAG | - ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | - ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << - GUC_LOG_CAPTURE_SHIFT) | - (offset << GUC_LOG_BUF_ADDR_SHIFT); + FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) | + FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) | + FIELD_PREP(GUC_LOG_BUF_ADDR, offset); #undef LOG_UNIT #undef LOG_FLAG @@ -144,7 +141,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc) static u32 guc_ctl_ads_flags(struct xe_guc *guc) { u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT; - u32 flags = ads << GUC_ADS_ADDR_SHIFT; + u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads); return flags; } @@ -156,7 +153,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt) * on RCS and CCSes with different address spaces, which on DG2 is * required as a WA for an HW bug. */ - if (XE_WA(gt, 22011391025)) + if (XE_GT_WA(gt, 22011391025)) return true; /* @@ -183,10 +180,10 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 flags = 0; - if (XE_WA(gt, 22012773006)) + if (XE_GT_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; - if (XE_WA(gt, 14014475959)) + if (XE_GT_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; if (needs_wa_dual_queue(gt)) @@ -200,19 +197,22 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; - if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) + if (XE_GT_WA(gt, 22012727170) || XE_GT_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - if (XE_WA(gt, 18020744125) && + if (XE_GT_WA(gt, 18020744125) && !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; - if (XE_WA(gt, 1509372804)) + if (XE_GT_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_WA(gt, 14018913170)) + if (XE_GT_WA(gt, 14018913170)) flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + if (XE_GT_WA(gt, 16023683509)) + flags |= GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6; + return flags; } @@ -420,7 +420,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); @@ -570,6 +570,86 @@ err_deregister: return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -577,7 +657,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); @@ -620,30 +700,54 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) if (ret) return ret; - ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo); - if (ret) - return ret; - return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); + err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; - err = xe_guc_relay_init(&guc->relay); + err = xe_gt_sriov_vf_query_config(gt); if (err) return err; return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -653,13 +757,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -681,10 +785,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -693,8 +793,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: @@ -736,6 +834,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) if (ret) return ret; + ret = xe_guc_ct_init_post_hwconfig(&guc->ct); + if (ret) + return ret; + guc_init_params_post_hwconfig(guc); ret = xe_guc_submit_init(guc, ~0); @@ -767,15 +869,17 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) return ret; } - guc->submission_state.enabled = true; - - return 0; + return xe_guc_submit_enable(guc); } int xe_guc_reset(struct xe_guc *guc) @@ -885,11 +989,14 @@ static int guc_load_done(u32 status) case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH: case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE: case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR: + case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH: case XE_GUC_LOAD_STATUS_DPC_ERROR: case XE_GUC_LOAD_STATUS_EXCEPTION: case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID: case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: return -1; } @@ -948,7 +1055,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) #endif #define GUC_LOAD_TIME_WARN_MS 200 -static void guc_wait_ucode(struct xe_guc *guc) +static int guc_wait_ucode(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); struct xe_mmio *mmio = >->mmio; @@ -1029,21 +1136,33 @@ static void guc_wait_ucode(struct xe_guc *guc) } switch (ukernel) { + case XE_GUC_LOAD_STATUS_HWCONFIG_START: + xe_gt_err(gt, "still extracting hwconfig table.\n"); + break; + case XE_GUC_LOAD_STATUS_EXCEPTION: xe_gt_err(gt, "firmware exception. EIP: %#x\n", xe_mmio_read32(mmio, SOFT_SCRATCH(13))); break; + case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID: + xe_gt_err(gt, "illegal init/ADS data\n"); + break; + case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID: xe_gt_err(gt, "illegal register in save/restore workaround list\n"); break; - case XE_GUC_LOAD_STATUS_HWCONFIG_START: - xe_gt_err(gt, "still extracting hwconfig table.\n"); + case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR: + xe_gt_err(gt, "illegal workaround KLV data\n"); + break; + + case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG: + xe_gt_err(gt, "illegal feature flag specified\n"); break; } - xe_device_declare_wedged(gt_to_xe(gt)); + return -EPROTO; } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) { xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n", delta_ms, status, count); @@ -1055,7 +1174,10 @@ static void guc_wait_ucode(struct xe_guc *guc) delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc), before_freq, status, count); } + + return 0; } +ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO); static int __xe_guc_upload(struct xe_guc *guc) { @@ -1087,14 +1209,16 @@ static int __xe_guc_upload(struct xe_guc *guc) goto out; /* Wait for authentication */ - guc_wait_ucode(guc); + ret = guc_wait_ucode(guc); + if (ret) + goto out; xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING); return 0; out: xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL); - return 0 /* FIXME: ret, don't want to stop load currently */; + return ret; } static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) @@ -1112,13 +1236,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** @@ -1468,7 +1596,7 @@ void xe_guc_sanitize(struct xe_guc *guc) { xe_uc_fw_sanitize(&guc->fw); xe_guc_ct_disable(&guc->ct); - guc->submission_state.enabled = false; + xe_guc_submit_disable(guc); } int xe_guc_reset_prepare(struct xe_guc *guc) @@ -1561,3 +1689,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc) xe_guc_ct_stop(&guc->ct); xe_guc_submit_wedge(guc); } + +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_g2g_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..1cca05967e62 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); @@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); @@ -51,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc); int xe_guc_start(struct xe_guc *guc); void xe_guc_declare_wedged(struct xe_guc *guc); +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); +#endif + static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) { switch (class) { diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 07a027755627..58e0b0294a5b 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -247,7 +247,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (XE_WA(gt, 1607983814)) + if (XE_GT_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -284,52 +284,26 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } -static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, - u32 value, - u32 *offset, u32 *remain) +static void guc_waklv_enable(struct xe_guc_ads *ads, + u32 data[], u32 data_len_dw, + u32 *offset, u32 *remain, + enum xe_guc_klv_ids klv_id) { - u32 size; - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 1), - value, - /* 1 dword data */ - }; - - size = sizeof(klv_entry); + size_t size = sizeof(u32) * (1 + data_len_dw); if (*remain < size) { drm_warn(&ads_to_xe(ads)->drm, - "w/a klv buffer too small to add klv id %d\n", klv_id); - } else { - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); - *offset += size; - *remain -= size; + "w/a klv buffer too small to add klv id 0x%04X\n", klv_id); + return; } -} -static void guc_waklv_enable_simple(struct xe_guc_ads *ads, - enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) -{ - u32 klv_entry[] = { - /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, klv_id) | - FIELD_PREP(GUC_KLV_0_LEN, 0), - /* 0 dwords data */ - }; - u32 size; + /* 16:16 key/length */ + xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32, + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw)); + /* data_len_dw dwords of data */ + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), + *offset + sizeof(u32), data, data_len_dw * sizeof(u32)); - size = sizeof(klv_entry); - - if (xe_gt_WARN(ads_to_gt(ads), *remain < size, - "w/a klv buffer too small to add klv id %d\n", klv_id)) - return; - - xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, - klv_entry, size); *offset += size; *remain -= size; } @@ -343,44 +317,51 @@ static void guc_waklv_init(struct xe_guc_ads *ads) offset = guc_ads_waklv_offset(ads); remain = guc_ads_waklv_size(ads); - if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - &offset, &remain); - if (XE_WA(gt, 18024947630)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, - &offset, &remain); - if (XE_WA(gt, 16022287689)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, - &offset, &remain); - - if (XE_WA(gt, 14022866841)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO, - &offset, &remain); + if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); + if (XE_GT_WA(gt, 18024947630)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING); + if (XE_GT_WA(gt, 16022287689)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE); + + if (XE_GT_WA(gt, 14022866841)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO); /* * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, * the default value for this register is determined to be 0xC40. This could change in the * future, so GuC depends on KMD to send it the correct value. */ - if (XE_WA(gt, 13011645652)) - guc_waklv_enable_one_word(ads, - GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, - 0xC40, - &offset, &remain); - - if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) - guc_waklv_enable_simple(ads, - GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, - &offset, &remain); - - if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, - &offset, &remain); + if (XE_GT_WA(gt, 13011645652)) { + u32 data = 0xC40; + + guc_waklv_enable(ads, &data, 1, &offset, &remain, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE); + } + + if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET); + + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH); + if (GUC_FIRMWARE_VER(>->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) { + u32 data[] = { + 0x0, + 0xF, + }; + guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG); + } + + if (XE_GT_WA(gt, 14020001231)) + guc_waklv_enable(ads, NULL, 0, &offset, &remain, + GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT); size = guc_ads_waklv_size(ads) - remain; if (!size) @@ -784,7 +765,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); @@ -890,7 +871,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -914,7 +895,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); @@ -995,16 +976,6 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); } -static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_policies *policies) -{ - CLASS(xe_guc_buf_from_data, buf)(&ads_to_guc(ads)->buf, policies, sizeof(*policies)); - - if (!xe_guc_buf_is_valid(buf)) - return -ENOBUFS; - - return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); -} - /** * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy * @ads: Additional data structures object @@ -1015,13 +986,16 @@ static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_poli */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); struct guc_policies *policies; - int ret; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1031,7 +1005,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - ret = guc_ads_update_policies(ads, policies); - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c index 14a07dca48e7..502ca3a4ee60 100644 --- a/drivers/gpu/drm/xe/xe_guc_buf.c +++ b/drivers/gpu/drm/xe/xe_guc_buf.c @@ -164,7 +164,7 @@ u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *p if (offset < 0 || offset + size > cache->sam->base.size) return 0; - return cache->sam->gpu_addr + offset; + return xe_sa_manager_gpu_addr(cache->sam) + offset; } #if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 859a3ba91be5..243dad3e2418 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1817,6 +1817,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm str_yes_no(snapshot->kernel_reserved)); for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { + /* + * FIXME: During devcoredump print we should avoid accessing the + * driver pointers for gt or engine. Printing should be done only + * using the snapshot captured. Here we are accessing the gt + * pointer. It should be fixed. + */ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, capture_class, false); snapshot_print_by_list_order(snapshot, p, type, list); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 37509f619503..18f6327bf552 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -26,15 +26,22 @@ #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" #include "xe_gt_sriov_printk.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" #include "xe_guc_relay.h" #include "xe_guc_submit.h" +#include "xe_guc_tlb_inval.h" #include "xe_map.h" #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); +static void guc_ct_change_state(struct xe_guc_ct *ct, + enum xe_guc_ct_state state); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -80,6 +87,7 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; @@ -89,15 +97,18 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + g2h_fence->done = true; +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -189,14 +200,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -207,12 +215,10 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); @@ -238,6 +244,30 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +static void guc_action_disable_ct(void *arg) +{ + struct xe_guc_ct *ct = arg; + + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); +} + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | @@ -248,16 +278,38 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) ct->bo = bo; - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; - return 0; + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ +/** + * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM + * @ct: the &xe_guc_ct + * + * Allocate a new BO in VRAM and free the previous BO that was allocated + * in system memory (SMEM). Applicable only for DGFX products. + * + * Return: 0 on success, or a negative errno on failure. + */ +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + int ret; + + xe_assert(xe, !xe_guc_ct_enabled(ct)); + + if (IS_DGFX(xe)) { + ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo); + if (ret) + return ret; + } + + devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct); + return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct); +} + #define desc_read(xe_, guc_ctb__, field_) \ xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \ struct guc_ct_buffer_desc, field_) @@ -374,9 +426,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -388,8 +444,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -443,7 +511,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -459,11 +527,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); @@ -504,7 +571,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -520,7 +587,7 @@ void xe_guc_ct_stop(struct xe_guc_ct *ct) if (!xe_guc_ct_initialized(ct)) return; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -1011,11 +1078,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) return true; } +#define GUC_SEND_RETRY_LIMIT 50 +#define GUC_SEND_RETRY_MSLEEP 5 + static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer, bool no_fail) { struct xe_gt *gt = ct_to_gt(ct); struct g2h_fence g2h_fence; + unsigned int retries = 0; int ret = 0; /* @@ -1080,9 +1151,20 @@ retry_same_fence: xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", action[0], g2h_fence.reason); mutex_unlock(&ct->lock); + if (++retries > GUC_SEND_RETRY_LIMIT) { + xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n", + action[0], GUC_SEND_RETRY_LIMIT); + return -ELOOP; + } + msleep(GUC_SEND_RETRY_MSLEEP * retries); goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1091,6 +1173,7 @@ retry_same_fence: if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; @@ -1381,8 +1464,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_pagefault_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY: ret = xe_guc_access_counter_notify_handler(guc, payload, @@ -1404,6 +1486,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_NOTIFY_EXCEPTION: ret = guc_crash_process_msg(ct, action); break; +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) + case XE_GUC_ACTION_TEST_G2G_RECV: + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); + break; +#endif default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } @@ -1583,8 +1670,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) break; case XE_GUC_ACTION_TLB_INVALIDATION_DONE: __g2h_release_space(ct, len); - ret = xe_guc_tlb_invalidation_done_handler(guc, payload, - adj_len); + ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len); break; default: xe_gt_warn(gt, "NOT_POSSIBLE"); @@ -1897,7 +1983,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 99c5dec446f2..cf41210ab30a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,7 +11,9 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); +int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); void xe_guc_ct_stop(struct xe_guc_ct *ct); diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c index 92e1f9f41b8c..2b99c1ebdd58 100644 --- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c +++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c @@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc, struct xe_tile *tile = gt_to_tile(gt); struct xe_bo *bo, *metadata_bo; - metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size), - ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size), + ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, + false); if (IS_ERR(metadata_bo)) return PTR_ERR(metadata_bo); - bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size), - ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size), + ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false); if (IS_ERR(bo)) { xe_bo_unpin_map_no_vm(metadata_bo); diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h index a3f421e2adc0..c30c0e3ccbbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h @@ -35,8 +35,8 @@ struct xe_guc_exec_queue { struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE]; /** @lr_tdr: long running TDR worker */ struct work_struct lr_tdr; - /** @fini_async: do final fini async from this worker */ - struct work_struct fini_async; + /** @destroy_async: do final destroy async from this worker */ + struct work_struct destroy_async; /** @resume_time: time of last resume */ u64 resume_time; /** @state: GuC specific state for this xe_exec_queue */ diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 6f57578b07cb..50c4c2406132 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -15,6 +15,7 @@ #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 +#define G2H_LEN_DW_G2G_NOTIFY_MIN 3 #define GUC_ID_MAX 65535 #define GUC_ID_UNKNOWN 0xffffffff @@ -45,6 +46,11 @@ #define GUC_MAX_ENGINE_CLASSES 16 #define GUC_MAX_INSTANCES_PER_CLASS 32 +#define GUC_CONTEXT_NORMAL 0 +#define GUC_CONTEXT_COMPRESSION_SAVE 1 +#define GUC_CONTEXT_COMPRESSION_RESTORE 2 +#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1) + /* Helper for context registration H2G */ struct guc_ctxt_registration_info { u32 flags; @@ -60,6 +66,7 @@ struct guc_ctxt_registration_info { u32 hwlrca_hi; }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1) /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { @@ -84,13 +91,10 @@ struct guc_update_exec_queue_policy { #define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1) #define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2) #define GUC_LOG_LOG_ALLOC_UNITS BIT(3) -#define GUC_LOG_CRASH_SHIFT 4 -#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DEBUG_SHIFT 6 -#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) -#define GUC_LOG_CAPTURE_SHIFT 10 -#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT) -#define GUC_LOG_BUF_ADDR_SHIFT 12 +#define GUC_LOG_CRASH REG_GENMASK(5, 4) +#define GUC_LOG_DEBUG REG_GENMASK(9, 6) +#define GUC_LOG_CAPTURE REG_GENMASK(11, 10) +#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12) #define GUC_CTL_WA 1 #define GUC_WA_GAM_CREDITS BIT(10) @@ -103,28 +107,23 @@ struct guc_update_exec_queue_policy { #define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) #define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) +#define GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6 BIT(25) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_ENABLE_LITE_RESTORE BIT(4) +#define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7) #define GUC_CTL_DISABLE_SCHEDULER BIT(14) #define GUC_CTL_DEBUG 3 -#define GUC_LOG_VERBOSITY_SHIFT 0 -#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT) -#define GUC_LOG_VERBOSITY_MIN 0 +#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0) #define GUC_LOG_VERBOSITY_MAX 3 -#define GUC_LOG_VERBOSITY_MASK 0x0000000f -#define GUC_LOG_DESTINATION_MASK (3 << 4) -#define GUC_LOG_DISABLED (1 << 6) -#define GUC_PROFILE_ENABLED (1 << 7) +#define GUC_LOG_DESTINATION REG_GENMASK(5, 4) +#define GUC_LOG_DISABLED BIT(6) +#define GUC_PROFILE_ENABLED BIT(7) #define GUC_CTL_ADS 4 -#define GUC_ADS_ADDR_SHIFT 1 -#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) +#define GUC_ADS_ADDR REG_GENMASK(21, 1) #define GUC_CTL_DEVID 5 diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 38039c411387..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index f1e2b0be90a9..98a47ac42b08 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -17,7 +17,7 @@ struct xe_device; #define DEBUG_BUFFER_SIZE SZ_8M #define CAPTURE_BUFFER_SIZE SZ_2M #else -#define CRASH_BUFFER_SIZE SZ_8K +#define CRASH_BUFFER_SIZE SZ_16K #define DEBUG_BUFFER_SIZE SZ_64K #define CAPTURE_BUFFER_SIZE SZ_1M #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9fab5f5b10fa..53fdf59524c4 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -52,9 +55,11 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 #define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -74,6 +79,11 @@ * Xe driver enables SLPC with all of its defaults features and frequency * selection, which varies per platform. * + * Power profiles add another level of control to SLPC. When power saving + * profile is chosen, SLPC will use conservative thresholds to ramp frequency, + * thus saving power. Base profile is default and ensures balanced performance + * for any workload. + * * Render-C States: * ================ * @@ -142,6 +152,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -554,6 +594,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -564,26 +623,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -597,24 +658,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -627,24 +692,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -658,24 +727,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_GT_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; + guard(mutex)(&pc->freq_lock); - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -781,7 +840,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc) { struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(gt, 22019338487)) { + if (XE_GT_WA(gt, 22019338487)) { if (xe_gt_is_media_type(gt)) return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); else @@ -845,7 +904,7 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); - if (XE_WA(tile->primary_gt, 14022085890)) + if (XE_GT_WA(tile->primary_gt, 14022085890)) ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); out: @@ -873,30 +932,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_GT_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_GT_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -935,7 +1081,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); struct xe_gt *gt = pc_to_gt(pc); - unsigned int fw_ref; int ret = 0; if (xe->info.skip_guc_pc) @@ -945,17 +1090,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (ret) return ret; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; - } - - xe_gt_idle_disable_c6(gt); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return 0; + return xe_gt_idle_disable_c6(gt); } /** @@ -1041,6 +1176,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val) return ret; } +static const char *power_profile_to_string(struct xe_guc_pc *pc) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + return "base"; + case SLPC_POWER_PROFILE_POWER_SAVING: + return "power_saving"; + default: + return "invalid"; + } +} + +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile) +{ + switch (pc->power_profile) { + case SLPC_POWER_PROFILE_BASE: + sprintf(profile, "[%s] %s\n", "base", "power_saving"); + break; + case SLPC_POWER_PROFILE_POWER_SAVING: + sprintf(profile, "%s [%s]\n", "base", "power_saving"); + break; + default: + sprintf(profile, "invalid"); + } +} + +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf) +{ + int ret = 0; + u32 val; + + if (strncmp("base", buf, strlen("base")) == 0) + val = SLPC_POWER_PROFILE_BASE; + else if (strncmp("power_saving", buf, strlen("power_saving")) == 0) + val = SLPC_POWER_PROFILE_POWER_SAVING; + else + return -EINVAL; + + guard(mutex)(&pc->freq_lock); + xe_pm_runtime_get_noresume(pc_to_xe(pc)); + + ret = pc_action_set_param(pc, + SLPC_PARAM_POWER_PROFILE, + val); + if (ret) + xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n", + val, ERR_PTR(ret)); + else + pc->power_profile = val; + + xe_pm_runtime_put(pc_to_xe(pc)); + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -1119,6 +1309,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) /* Enable SLPC Optimized Strategy for compute */ ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); + /* Set cached value of power_profile */ + ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc)); + if (unlikely(ret)) + xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret)); + out: xe_force_wake_put(gt_to_fw(gt), fw_ref); return ret; @@ -1197,6 +1392,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) pc->bo = bo; + pc->power_profile = SLPC_POWER_PROFILE_BASE; + return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc); } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..0e31396f103c 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq); int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq); int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq); +int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf); +void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile); enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc); u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); @@ -38,5 +40,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..5e4ea53fbee6 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ @@ -35,6 +37,8 @@ struct xe_guc_pc { struct mutex freq_lock; /** @freq_ready: Only handle freq changes, if they are really ready */ bool freq_ready; + /** @power_profile: Base or power_saving profile */ + u32 power_profile; }; #endif /* _XE_GUC_PC_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index df7a5a4eec74..94ed8159496f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -32,6 +32,7 @@ #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" #include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -43,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -316,6 +318,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } +/* + * Given that we want to guarantee enough RCS throughput to avoid missing + * frames, we set the yield policy to 20% of each 80ms interval. + */ +#define RC_YIELD_DURATION 80 /* in ms */ +#define RC_YIELD_RATIO 20 /* in percent */ +static u32 *emit_render_compute_yield_klv(u32 *emit) +{ + *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD); + *emit++ = RC_YIELD_DURATION; + *emit++ = RC_YIELD_RATIO; + + return emit; +} + +#define SCHEDULING_POLICY_MAX_DWORDS 16 +static int guc_init_global_schedule_policy(struct xe_guc *guc) +{ + u32 data[SCHEDULING_POLICY_MAX_DWORDS]; + u32 *emit = data; + u32 count = 0; + int ret; + + if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) + return 0; + + *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; + + if (CCS_MASK(guc_to_gt(guc))) + emit = emit_render_compute_yield_klv(emit); + + count = emit - data; + if (count > 1) { + xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS); + + ret = xe_guc_ct_send_block(&guc->ct, data, count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC scheduling policies: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + +int xe_guc_submit_enable(struct xe_guc *guc) +{ + int ret; + + ret = guc_init_global_schedule_policy(guc); + if (ret) + return ret; + + guc->submission_state.enabled = true; + + return 0; +} + +void xe_guc_submit_disable(struct xe_guc *guc) +{ + guc->submission_state.enabled = false; +} + static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) { int i; @@ -542,7 +609,7 @@ static void __register_exec_queue(struct xe_guc *guc, xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); } -static void register_exec_queue(struct xe_exec_queue *q) +static void register_exec_queue(struct xe_exec_queue *q, int ctx_type) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); @@ -550,6 +617,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct guc_ctxt_registration_info info; xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -557,7 +625,8 @@ static void register_exec_queue(struct xe_exec_queue *q) info.engine_submit_mask = q->logical_mask; info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; + info.flags = CONTEXT_REGISTRATION_FLAG_KMD | + FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type); if (xe_exec_queue_is_parallel(q)) { u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); @@ -667,12 +736,18 @@ static void wq_item_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wqi_size)) return; + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN); wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | FIELD_PREP(WQ_LEN_MASK, len_dw); + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW); wqi[i++] = xe_lrc_descriptor(q->lrc[0]); + xe_gt_assert(guc_to_gt(guc), i == + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS); wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID); wqi[i++] = 0; + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL); for (j = 1; j < q->width; ++j) { struct xe_lrc *lrc = q->lrc[j]; @@ -693,6 +768,50 @@ static void wq_item_append(struct xe_exec_queue *q) parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); } +static int wq_items_rebase(struct xe_exec_queue *q) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); + int i = q->guc->wqi_head; + + /* the ring starts after a header struct */ + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0])); + + while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { + u32 len_dw, type, val; + + if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE)) + break; + + val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) + + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN, + WQ_SIZE / sizeof(u32)); + len_dw = FIELD_GET(WQ_LEN_MASK, val); + type = FIELD_GET(WQ_TYPE_MASK, val); + + if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32))) + break; + + if (type == WQ_TYPE_MULTI_LRC) { + val = xe_lrc_descriptor(q->lrc[0]); + xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) + + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW, + WQ_SIZE / sizeof(u32), val); + } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) { + break; + } + + i += (len_dw + 1) * sizeof(u32); + } + + if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) { + xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n"); + return -EBADMSG; + } + return 0; +} + #define RESUME_PENDING ~0x0ull static void submit_exec_queue(struct xe_exec_queue *q) { @@ -761,7 +880,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { if (!exec_queue_registered(q)) - register_exec_queue(q); + register_exec_queue(q, GUC_CONTEXT_NORMAL); if (!lr) /* LR jobs are emitted in the exec IOCTL */ q->ring_ops->emit_job(job); submit_exec_queue(q); @@ -777,6 +896,30 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) return fence; } +/** + * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending + * on all queues under a guc. + * @guc: the &xe_guc struct instance + */ +void xe_guc_jobs_ring_rebase(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + /* + * This routine is used within VF migration recovery. This means + * using the lock here introduces a restriction: we cannot wait + * for any GFX HW response while the lock is taken. + */ + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_killed_or_banned_or_wedged(q)) + continue; + xe_exec_queue_jobs_ring_restore(q); + } + mutex_unlock(&guc->submission_state.lock); +} + static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) { struct xe_sched_job *job = to_xe_sched_job(drm_job); @@ -908,12 +1051,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1084,7 +1228,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1092,12 +1236,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1130,7 +1270,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1265,7 +1406,7 @@ trigger_reset: /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1275,54 +1416,61 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); + return DRM_GPU_SCHED_STAT_NO_HANG; +} + +static void guc_exec_queue_fini(struct xe_exec_queue *q) +{ + struct xe_guc_exec_queue *ge = q->guc; + struct xe_guc *guc = exec_queue_to_guc(q); + + release_guc_id(guc, q); + xe_sched_entity_fini(&ge->entity); + xe_sched_fini(&ge->sched); - return DRM_GPU_SCHED_STAT_NOMINAL; + /* + * RCU free due sched being exported via DRM scheduler fences + * (timeline name). + */ + kfree_rcu(ge, rcu); } -static void __guc_exec_queue_fini_async(struct work_struct *w) +static void __guc_exec_queue_destroy_async(struct work_struct *w) { struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); + container_of(w, struct xe_guc_exec_queue, destroy_async); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); xe_pm_runtime_get(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - release_guc_id(guc, q); if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - /* - * RCU free due sched being exported via DRM scheduler fences - * (timeline name). - */ - kfree_rcu(ge, rcu); xe_exec_queue_fini(q); + xe_pm_runtime_put(guc_to_xe(guc)); } -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) +static void guc_exec_queue_destroy_async(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); + INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); + __guc_exec_queue_destroy_async(&q->guc->destroy_async); else - queue_work(xe->destroy_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->destroy_async); } -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) +static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q) { /* * Might be done from within the GPU scheduler, need to do async as we @@ -1331,7 +1479,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) * this we and don't really care when everything is fini'd, just that it * is. */ - guc_exec_queue_fini_async(q); + guc_exec_queue_destroy_async(q); } static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) @@ -1342,10 +1490,20 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) @@ -1578,14 +1736,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, #define STATIC_MSG_CLEANUP 0 #define STATIC_MSG_SUSPEND 1 #define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) +static void guc_exec_queue_destroy(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) guc_exec_queue_add_msg(q, msg, CLEANUP); else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); + __guc_exec_queue_destroy(exec_queue_to_guc(q), q); } static int guc_exec_queue_set_priority(struct xe_exec_queue *q, @@ -1715,6 +1873,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .init = guc_exec_queue_init, .kill = guc_exec_queue_kill, .fini = guc_exec_queue_fini, + .destroy = guc_exec_queue_destroy, .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, @@ -1736,7 +1895,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -1777,6 +1936,43 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) } } +/** + * xe_guc_submit_reset_block - Disallow reset calls on given GuC. + * @guc: the &xe_guc struct instance + */ +int xe_guc_submit_reset_block(struct xe_guc *guc) +{ + return atomic_fetch_or(1, &guc->submission_state.reset_blocked); +} + +/** + * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC. + * @guc: the &xe_guc struct instance + */ +void xe_guc_submit_reset_unblock(struct xe_guc *guc) +{ + atomic_set_release(&guc->submission_state.reset_blocked, 0); + wake_up_all(&guc->ct.wq); +} + +static int guc_submit_reset_is_blocked(struct xe_guc *guc) +{ + return atomic_read_acquire(&guc->submission_state.reset_blocked); +} + +/* Maximum time of blocking reset */ +#define RESET_BLOCK_PERIOD_MAX (HZ * 5) + +/** + * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout. + * @guc: the &xe_guc struct instance + */ +int xe_guc_wait_reset_unblock(struct xe_guc *guc) +{ + return wait_event_timeout(guc->ct.wq, + !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX); +} + int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; @@ -1830,6 +2026,19 @@ void xe_guc_submit_stop(struct xe_guc *guc) } +/** + * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be disabled + */ +void xe_guc_submit_pause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + xe_sched_submission_stop_async(&q->guc->sched); +} + static void guc_exec_queue_start(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; @@ -1870,6 +2079,28 @@ int xe_guc_submit_start(struct xe_guc *guc) return 0; } +static void guc_exec_queue_unpause(struct xe_exec_queue *q) +{ + struct xe_gpu_scheduler *sched = &q->guc->sched; + + xe_sched_submission_start(sched); +} + +/** + * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC. + * @guc: the &xe_guc struct instance whose scheduler is to be enabled + */ +void xe_guc_submit_unpause(struct xe_guc *guc) +{ + struct xe_exec_queue *q; + unsigned long index; + + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + guc_exec_queue_unpause(q); + + wake_up_all(&guc->ct.wq); +} + static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { @@ -1883,7 +2114,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); + xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id); return NULL; } @@ -1993,7 +2224,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) xe_exec_queue_put(q); else - __guc_exec_queue_fini(guc, q); + __guc_exec_queue_destroy(guc, q); } int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) @@ -2090,12 +2321,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2109,8 +2344,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); @@ -2367,6 +2613,34 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) } /** + * xe_guc_register_vf_exec_queue - Register exec queue for a given context type. + * @q: Execution queue + * @ctx_type: Type of the context + * + * This function registers the execution queue with the guc. Special context + * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE + * are only applicable for IGPU and in the VF. + * Submits the execution queue to GUC after registering it. + * + * Returns - None. + */ +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type) +{ + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + xe_gt_assert(gt, !IS_DGFX(xe)); + xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE || + ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE); + xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0)); + + register_exec_queue(q, ctx_type); + enable_scheduling(q); +} + +/** * xe_guc_submit_print - GuC Submit Print. * @guc: GuC. * @p: drm_printer where it will be printed out. @@ -2386,3 +2660,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) guc_exec_queue_print(q, p); mutex_unlock(&guc->submission_state.lock); } + +/** + * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all + * exec queues registered to given GuC. + * @guc: the &xe_guc struct instance + * @scratch: scratch buffer to be used as temporary storage + * + * Returns: zero on success, negative error code on failure. + */ +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch) +{ + struct xe_exec_queue *q; + unsigned long index; + int err = 0; + + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + err = xe_exec_queue_contexts_hwsp_rebase(q, scratch); + if (err) + break; + if (xe_exec_queue_is_parallel(q)) + err = wq_items_rebase(q); + if (err) + break; + } + mutex_unlock(&guc->submission_state.lock); + + return err; +} diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 9b71a986c6ca..78c3f07e31a0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -13,11 +13,18 @@ struct xe_exec_queue; struct xe_guc; int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids); +int xe_guc_submit_enable(struct xe_guc *guc); +void xe_guc_submit_disable(struct xe_guc *guc); int xe_guc_submit_reset_prepare(struct xe_guc *guc); void xe_guc_submit_reset_wait(struct xe_guc *guc); void xe_guc_submit_stop(struct xe_guc *guc); int xe_guc_submit_start(struct xe_guc *guc); +void xe_guc_submit_pause(struct xe_guc *guc); +void xe_guc_submit_unpause(struct xe_guc *guc); +int xe_guc_submit_reset_block(struct xe_guc *guc); +void xe_guc_submit_reset_unblock(struct xe_guc *guc); +int xe_guc_wait_reset_unblock(struct xe_guc *guc); void xe_guc_submit_wedge(struct xe_guc *guc); int xe_guc_read_stopped(struct xe_guc *guc); @@ -29,6 +36,8 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_guc_jobs_ring_rebase(struct xe_guc *guc); + struct xe_guc_submit_exec_queue_snapshot * xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); void @@ -39,5 +48,8 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p); +void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type); + +int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c new file mode 100644 index 000000000000..6bf2103602f8 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "abi/guc_actions_abi.h" + +#include "xe_device.h" +#include "xe_gt_stats.h" +#include "xe_gt_types.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_force_wake.h" +#include "xe_mmio.h" +#include "xe_tlb_inval.h" + +#include "regs/xe_guc_regs.h" + +/* + * XXX: The seqno algorithm relies on TLB invalidation being processed in order + * which they currently are by the GuC, if that changes the algorithm will need + * to be updated. + */ + +static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + xe_gt_assert(gt, action[1]); /* Seqno */ + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); + return xe_guc_ct_send(&guc->ct, action, len, + G2H_LEN_DW_TLB_INVALIDATE, 1); +} + +#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ + XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ + XE_GUC_TLB_INVAL_FLUSH_CACHE) + +static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); +} + +static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno) +{ + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = guc_to_xe(guc); + + /* + * Returning -ECANCELED in this function is squashed at the caller and + * signals waiters. + */ + + if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) { + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION, + seqno, + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), + }; + + return send_tlb_inval(guc, action, ARRAY_SIZE(action)); + } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { + struct xe_mmio *mmio = >->mmio; + unsigned int fw_ref; + + if (IS_SRIOV_VF(xe)) + return -ECANCELED; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(mmio, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + xe_force_wake_put(gt_to_fw(gt), fw_ref); + } + + return -ECANCELED; +} + +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + +static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno, + u64 start, u64 end, u32 asid) +{ +#define MAX_TLB_INVALIDATION_LEN 7 + struct xe_guc *guc = tlb_inval->private; + struct xe_gt *gt = guc_to_gt(guc); + u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; + int len = 0; + + if (guc_to_xe(guc)->info.force_execlist) + return -ECANCELED; + + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; + action[len++] = seqno; + if (!gt_to_xe(gt)->info.has_range_tlb_inval || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); + } else { + u64 orig_start = start; + u64 align; + + if (length < SZ_4K) + length = SZ_4K; + + /* + * We need to invalidate a higher granularity if start address + * is not aligned to length. When start is not aligned with + * length we need to find the length large enough to create an + * address mask covering the required range. + */ + align = roundup_pow_of_two(length); + start = ALIGN_DOWN(start, align); + end = ALIGN(end, align); + length = align; + while (start + length < end) { + length <<= 1; + start = ALIGN_DOWN(orig_start, length); + } + + /* + * Minimum invalidation size for a 2MB page that the hardware + * expects is 16MB + */ + if (length >= SZ_2M) { + length = max_t(u64, SZ_16M, length); + start = ALIGN_DOWN(orig_start, length); + } + + xe_gt_assert(gt, length >= SZ_4K); + xe_gt_assert(gt, is_power_of_2(length)); + xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, + ilog2(SZ_2M) + 1))); + xe_gt_assert(gt, IS_ALIGNED(start, length)); + + action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); + action[len++] = asid; + action[len++] = lower_32_bits(start); + action[len++] = upper_32_bits(start); + action[len++] = ilog2(length) - ilog2(SZ_4K); + } + + xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); + + return send_tlb_inval(guc, action, len); +} + +static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + return xe_guc_ct_initialized(&guc->ct); +} + +static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + LNL_FLUSH_WORK(&guc->ct.g2h_worker); +} + +static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval) +{ + struct xe_guc *guc = tlb_inval->private; + + /* this reflects what HW/GuC needs to process TLB inv request */ + const long hw_tlb_timeout = HZ / 4; + + /* this estimates actual delay caused by the CTB transport */ + long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct); + + return hw_tlb_timeout + 2 * delay; +} + +static const struct xe_tlb_inval_ops guc_tlb_inval_ops = { + .all = send_tlb_inval_all, + .ggtt = send_tlb_inval_ggtt, + .ppgtt = send_tlb_inval_ppgtt, + .initialized = tlb_inval_initialized, + .flush = tlb_inval_flush, + .timeout_delay = tlb_inval_timeout_delay, +}; + +/** + * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early + * @guc: GuC object + * @tlb_inval: TLB invalidation client + * + * Inititialize GuC TLB invalidation by setting back pointer in TLB invalidation + * client to the GuC and setting GuC backend ops. + */ +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval) +{ + tlb_inval->private = guc; + tlb_inval->ops = &guc_tlb_inval_ops; +} + +/** + * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler + * @guc: guc + * @msg: message indicating TLB invalidation done + * @len: length of message + * + * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any + * invalidation fences for seqno. Algorithm for this depends on seqno being + * received in-order and asserts this assumption. + * + * Return: 0 on success, -EPROTO for malformed messages. + */ +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (unlikely(len != 1)) + return -EPROTO; + + xe_tlb_inval_done_handler(>->tlb_inval, msg[0]); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.h b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h new file mode 100644 index 000000000000..07d668b02e3d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_GUC_TLB_INVAL_H_ +#define _XE_GUC_TLB_INVAL_H_ + +#include <linux/types.h> + +struct xe_guc; +struct xe_tlb_inval; + +void xe_guc_tlb_inval_init_early(struct xe_guc *guc, + struct xe_tlb_inval *tlb_inval); + +int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 1fde7614fcc5..c7b9642b41ba 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -85,6 +85,12 @@ struct xe_guc { struct xarray exec_queue_lookup; /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; + /** + * @submission_state.reset_blocked: reset attempts are blocked; + * blocking reset in order to delay it may be required if running + * an operation which is sensitive to resets. + */ + atomic_t reset_blocked; /** @submission_state.lock: protects submission state */ struct mutex lock; /** @submission_state.enabled: submission is enabled */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..a415ca488791 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -11,15 +11,12 @@ #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ @@ -200,7 +197,7 @@ int xe_heci_gsc_init(struct xe_device *xe) if (ret) return ret; - if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) { + if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) { ret = heci_gsc_irq_setup(xe); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c deleted file mode 100644 index 57b71956ddf4..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ /dev/null @@ -1,325 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <linux/scatterlist.h> -#include <linux/mmu_notifier.h> -#include <linux/dma-mapping.h> -#include <linux/memremap.h> -#include <linux/swap.h> -#include <linux/hmm.h> -#include <linux/mm.h> -#include "xe_hmm.h" -#include "xe_vm.h" -#include "xe_bo.h" - -static u64 xe_npages_in_range(unsigned long start, unsigned long end) -{ - return (end - start) >> PAGE_SHIFT; -} - -static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, - struct hmm_range *range, struct rw_semaphore *notifier_sem) -{ - unsigned long i, npages, hmm_pfn; - unsigned long num_chunks = 0; - int ret; - - /* HMM docs says this is needed. */ - ret = down_read_interruptible(notifier_sem); - if (ret) - return ret; - - if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { - up_read(notifier_sem); - return -EAGAIN; - } - - npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages;) { - unsigned long len; - - hmm_pfn = range->hmm_pfns[i]; - xe_assert(xe, hmm_pfn & HMM_PFN_VALID); - - len = 1UL << hmm_pfn_to_map_order(hmm_pfn); - - /* If order > 0 the page may extend beyond range->start */ - len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); - i += len; - num_chunks++; - } - up_read(notifier_sem); - - return sg_alloc_table(st, num_chunks, GFP_KERNEL); -} - -/** - * xe_build_sg() - build a scatter gather table for all the physical pages/pfn - * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table - * and will be used to program GPU page table later. - * @xe: the xe device who will access the dma-address in sg table - * @range: the hmm range that we build the sg table from. range->hmm_pfns[] - * has the pfn numbers of pages that back up this hmm address range. - * @st: pointer to the sg table. - * @notifier_sem: The xe notifier lock. - * @write: whether we write to this range. This decides dma map direction - * for system pages. If write we map it bi-diretional; otherwise - * DMA_TO_DEVICE - * - * All the contiguous pfns will be collapsed into one entry in - * the scatter gather table. This is for the purpose of efficiently - * programming GPU page table. - * - * The dma_address in the sg table will later be used by GPU to - * access memory. So if the memory is system memory, we need to - * do a dma-mapping so it can be accessed by GPU/DMA. - * - * FIXME: This function currently only support pages in system - * memory. If the memory is GPU local memory (of the GPU who - * is going to access memory), we need gpu dpa (device physical - * address), and there is no need of dma-mapping. This is TBD. - * - * FIXME: dma-mapping for peer gpu device to access remote gpu's - * memory. Add this when you support p2p - * - * This function allocates the storage of the sg table. It is - * caller's responsibility to free it calling sg_free_table. - * - * Returns 0 if successful; -ENOMEM if fails to allocate memory - */ -static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, - struct rw_semaphore *notifier_sem, - bool write) -{ - unsigned long npages = xe_npages_in_range(range->start, range->end); - struct device *dev = xe->drm.dev; - struct scatterlist *sgl; - struct page *page; - unsigned long i, j; - - lockdep_assert_held(notifier_sem); - - i = 0; - for_each_sg(st->sgl, sgl, st->nents, j) { - unsigned long hmm_pfn, size; - - hmm_pfn = range->hmm_pfns[i]; - page = hmm_pfn_to_page(hmm_pfn); - xe_assert(xe, !is_device_private_page(page)); - - size = 1UL << hmm_pfn_to_map_order(hmm_pfn); - size -= page_to_pfn(page) & (size - 1); - i += size; - - if (unlikely(j == st->nents - 1)) { - xe_assert(xe, i >= npages); - if (i > npages) - size -= (i - npages); - - sg_mark_end(sgl); - } else { - xe_assert(xe, i < npages); - } - - sg_set_page(sgl, page, size << PAGE_SHIFT, 0); - } - - return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); -} - -static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - lockdep_assert_held_write(&vm->lock); - lockdep_assert_held(&vm->userptr.notifier_lock); - - mutex_lock(&userptr->unmap_mutex); - xe_assert(vm->xe, !userptr->mapped); - userptr->mapped = true; - mutex_unlock(&userptr->unmap_mutex); -} - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && - !lockdep_is_held_type(&vm->lock, 0) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - /* Don't unmap in exec critical section. */ - xe_vm_assert_held(vm); - /* Don't unmap while mapping the sg. */ - lockdep_assert_held(&vm->lock); - } - - mutex_lock(&userptr->unmap_mutex); - if (userptr->sg && userptr->mapped) - dma_unmap_sgtable(xe->drm.dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); - userptr->mapped = false; - mutex_unlock(&userptr->unmap_mutex); -} - -/** - * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * @uvma: the userptr vma which hold the scatter gather table - * - * With function xe_userptr_populate_range, we allocate storage of - * the userptr sg table. This is a helper function to free this - * sg table, and dma unmap the address in the table. - */ -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - - xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); - xe_hmm_userptr_unmap(uvma); - sg_free_table(userptr->sg); - userptr->sg = NULL; -} - -/** - * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual - * address range - * - * @uvma: userptr vma which has information of the range to populate. - * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. - * - * This function populate the physical pages of a virtual - * address range. The populated physical pages is saved in - * userptr's sg table. It is similar to get_user_pages but call - * hmm_range_fault. - * - * This function also read mmu notifier sequence # ( - * mmu_interval_read_begin), for the purpose of later - * comparison (through mmu_interval_read_retry). - * - * This must be called with mmap read or write lock held. - * - * This function allocates the storage of the userptr sg table. - * It is caller's responsibility to free it calling sg_free_table. - * - * returns: 0 for success; negative error no on failure - */ -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, - bool is_mm_mmap_locked) -{ - unsigned long timeout = - jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - unsigned long *pfns; - struct xe_userptr *userptr; - struct xe_vma *vma = &uvma->vma; - u64 userptr_start = xe_vma_userptr(vma); - u64 userptr_end = userptr_start + xe_vma_size(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct hmm_range hmm_range = { - .pfn_flags_mask = 0, /* ignore pfns */ - .default_flags = HMM_PFN_REQ_FAULT, - .start = userptr_start, - .end = userptr_end, - .notifier = &uvma->userptr.notifier, - .dev_private_owner = vm->xe, - }; - bool write = !xe_vma_read_only(vma); - unsigned long notifier_seq; - u64 npages; - int ret; - - userptr = &uvma->userptr; - - if (is_mm_mmap_locked) - mmap_assert_locked(userptr->notifier.mm); - - if (vma->gpuva.flags & XE_VMA_DESTROYED) - return 0; - - notifier_seq = mmu_interval_read_begin(&userptr->notifier); - if (notifier_seq == userptr->notifier_seq) - return 0; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - npages = xe_npages_in_range(userptr_start, userptr_end); - pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); - if (unlikely(!pfns)) - return -ENOMEM; - - if (write) - hmm_range.default_flags |= HMM_PFN_REQ_WRITE; - - if (!mmget_not_zero(userptr->notifier.mm)) { - ret = -EFAULT; - goto free_pfns; - } - - hmm_range.hmm_pfns = pfns; - - while (true) { - hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); - - if (!is_mm_mmap_locked) - mmap_read_lock(userptr->notifier.mm); - - ret = hmm_range_fault(&hmm_range); - - if (!is_mm_mmap_locked) - mmap_read_unlock(userptr->notifier.mm); - - if (ret == -EBUSY) { - if (time_after(jiffies, timeout)) - break; - - continue; - } - break; - } - - mmput(userptr->notifier.mm); - - if (ret) - goto free_pfns; - - ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); - if (ret) - goto free_pfns; - - ret = down_read_interruptible(&vm->userptr.notifier_lock); - if (ret) - goto free_st; - - if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { - ret = -EAGAIN; - goto out_unlock; - } - - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, - &vm->userptr.notifier_lock, write); - if (ret) - goto out_unlock; - - userptr->sg = &userptr->sgt; - xe_hmm_userptr_set_mapped(uvma); - userptr->notifier_seq = hmm_range.notifier_seq; - up_read(&vm->userptr.notifier_lock); - kvfree(pfns); - return 0; - -out_unlock: - up_read(&vm->userptr.notifier_lock); -free_st: - sg_free_table(&userptr->sgt); -free_pfns: - kvfree(pfns); - return ret; -} diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h deleted file mode 100644 index 0ea98d8e7bbc..000000000000 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Copyright © 2024 Intel Corporation - */ - -#ifndef _XE_HMM_H_ -#define _XE_HMM_H_ - -#include <linux/types.h> - -struct xe_userptr_vma; - -int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); - -void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); - -void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); -#endif diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..7e43b2dd6a32 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -171,7 +171,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 3439c8522d01..1cf623b4a5bc 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -576,7 +576,7 @@ static void adjust_idledly(struct xe_hw_engine *hwe) u32 maxcnt_units_ns = 640; bool inhibit_switch = 0; - if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) { + if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) { idledly = xe_mmio_read32(>->mmio, RING_IDLEDLY(hwe->mmio_base)); maxcnt = xe_mmio_read32(>->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base)); @@ -1059,12 +1059,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..fa4db5f23342 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -13,15 +13,6 @@ #include "xe_vm.h" static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - -static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { struct xe_exec_queue *q; @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } @@ -84,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -119,21 +103,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) break; case XE_ENGINE_CLASS_OTHER: break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); + case XE_ENGINE_CLASS_MAX: + xe_gt_assert(gt, false); } } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** @@ -238,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group err = q->ops->suspend_wait(q); if (err) - goto err_suspend; + return err; } if (need_resume) xe_hw_engine_group_resume_faulting_lr_jobs(group); return 0; - -err_suspend: - up_write(&group->mode_sem); - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c new file mode 100644 index 000000000000..8c65291f36fc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/fault-inject.h> + +#include "regs/xe_gsc_regs.h" +#include "regs/xe_hw_error_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_hw_error.h" +#include "xe_mmio.h" +#include "xe_survivability_mode.h" + +#define HEC_UNCORR_FW_ERR_BITS 4 +extern struct fault_attr inject_csc_hw_error; + +/* Error categories reported by hardware */ +enum hardware_error { + HARDWARE_ERROR_CORRECTABLE = 0, + HARDWARE_ERROR_NONFATAL = 1, + HARDWARE_ERROR_FATAL = 2, + HARDWARE_ERROR_MAX, +}; + +static const char * const hec_uncorrected_fw_errors[] = { + "Fatal", + "CSE Disabled", + "FD Corruption", + "Data Corruption" +}; + +static const char *hw_error_to_str(const enum hardware_error hw_err) +{ + switch (hw_err) { + case HARDWARE_ERROR_CORRECTABLE: + return "CORRECTABLE"; + case HARDWARE_ERROR_NONFATAL: + return "NONFATAL"; + case HARDWARE_ERROR_FATAL: + return "FATAL"; + default: + return "UNKNOWN"; + } +} + +static bool fault_inject_csc_hw_error(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1); +} + +static void csc_hw_error_work(struct work_struct *work) +{ + struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + ret = xe_survivability_mode_runtime_enable(xe); + if (ret) + drm_err(&xe->drm, "Failed to enable runtime survivability mode\n"); +} + +static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + struct xe_mmio *mmio = &tile->mmio; + u32 base, err_bit, err_src; + unsigned long fw_err; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + base = BMG_GSC_HECI1_BASE; + lockdep_assert_held(&xe->irq.lock); + err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported HEC_ERR_STATUS_%s blank\n", + tile->id, hw_err_str); + return; + } + + if (err_src & UNCORR_FW_REPORTED_ERR) { + fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base)); + for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) { + drm_err_ratelimited(&xe->drm, HW_ERR + "%s: HEC Uncorrected FW %s error reported, bit[%d] is set\n", + hw_err_str, hec_uncorrected_fw_errors[err_bit], + err_bit); + + schedule_work(&tile->csc_hw_error_work); + } + } + + xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src); +} + +static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err) +{ + const char *hw_err_str = hw_error_to_str(hw_err); + struct xe_device *xe = tile_to_xe(tile); + unsigned long flags; + u32 err_src; + + if (xe->info.platform != XE_BATTLEMAGE) + return; + + spin_lock_irqsave(&xe->irq.lock, flags); + err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); + if (!err_src) { + drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n", + tile->id, hw_err_str); + goto unlock; + } + + if (err_src & XE_CSC_ERROR) + csc_hw_error_handler(tile, hw_err); + + xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src); + +unlock: + spin_unlock_irqrestore(&xe->irq.lock, flags); +} + +/** + * xe_hw_error_irq_handler - irq handling for hw errors + * @tile: tile instance + * @master_ctl: value read from master interrupt register + * + * Xe platforms add three error bits to the master interrupt register to support error handling. + * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE. + * To process the interrupt, determine the source of error by reading the Device Error Source + * Register that corresponds to the class of error being serviced. + */ +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) +{ + enum hardware_error hw_err; + + if (fault_inject_csc_hw_error()) + schedule_work(&tile->csc_hw_error_work); + + for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) + if (master_ctl & ERROR_IRQ(hw_err)) + hw_error_source_handler(tile, hw_err); +} + +/* + * Process hardware errors during boot + */ +static void process_hw_errors(struct xe_device *xe) +{ + struct xe_tile *tile; + u32 master_ctl; + u8 id; + + for_each_tile(tile, xe, id) { + master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ); + xe_hw_error_irq_handler(tile, master_ctl); + xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl); + } +} + +/** + * xe_hw_error_init - Initialize hw errors + * @xe: xe device instance + * + * Initialize and check for errors that occurred during boot + * prior to driver load + */ +void xe_hw_error_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + return; + + INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work); + + process_hw_errors(xe); +} diff --git a/drivers/gpu/drm/xe/xe_hw_error.h b/drivers/gpu/drm/xe/xe_hw_error.h new file mode 100644 index 000000000000..d86e28c5180c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_error.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ +#ifndef XE_HW_ERROR_H_ +#define XE_HW_ERROR_H_ + +#include <linux/types.h> + +struct xe_tile; +struct xe_device; + +void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl); +void xe_hw_error_init(struct xe_device *xe); +#endif diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index f08fc4377d25..b6790589e623 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - u32 val0, val1; + u32 val0 = 0, val1 = 0; int ret = 0; ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, @@ -286,7 +286,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg */ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value) { - u64 reg_val = 0, min, max; + u32 reg_val = 0; struct xe_device *xe = hwmon->xe; struct xe_reg rapl_limit, pkg_power_sku; struct xe_mmio *mmio = xe_root_tile_mmio(xe); @@ -294,7 +294,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, (u32 *)®_val); + xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); @@ -304,19 +304,21 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channe /* Check if PL limits are disabled. */ if (!(reg_val & PWR_LIM_EN)) { *value = PL_DISABLE; - drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%016llx\n", + drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n", PWR_ATTR_TO_STR(attr), channel, reg_val); goto unlock; } reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); + *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power; /* For platforms with mailbox power limit support clamping would be done by pcode. */ if (!hwmon->xe->info.has_mbx_power_limits) { - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); + u64 pkg_pwr, min, max; + + pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku); + min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr); + max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); if (min && max) @@ -332,6 +334,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe int ret = 0; u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; mutex_lock(&hwmon->hwmon_lock); @@ -356,6 +359,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); @@ -478,8 +495,8 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at { struct xe_hwmon *hwmon = dev_get_drvdata(dev); struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; + u32 reg_val, x, y, x_w = 2; /* 2 bits */ + u64 tau4, out; int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD; u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR; @@ -490,23 +507,24 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at mutex_lock(&hwmon->hwmon_lock); if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); + ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, ®_val); if (ret) { drm_err(&hwmon->xe->drm, - "power interval read fail, ch %d, attr %d, r 0%llx, ret %d\n", - channel, power_attr, r, ret); - r = 0; + "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n", + channel, power_attr, reg_val, ret); + reg_val = 0; } } else { - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel)); + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)); } mutex_unlock(&hwmon->hwmon_lock); xe_pm_runtime_put(hwmon->xe); - x = REG_FIELD_GET(PWR_LIM_TIME_X, r); - y = REG_FIELD_GET(PWR_LIM_TIME_Y, r); + x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val); + y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val); /* * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17) @@ -719,7 +737,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { int ret; - u32 uval; + u32 uval = 0; mutex_lock(&hwmon->hwmon_lock); @@ -739,9 +757,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); @@ -889,7 +921,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) static umode_t xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; /* hwmon sysfs attribute of current available only for package */ if (channel != CHANNEL_PKG) @@ -991,7 +1023,7 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) static umode_t xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { - u32 uval; + u32 uval = 0; if (!hwmon->xe->info.has_fan_control) return 0; @@ -1265,13 +1297,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } -static void xe_hwmon_mutex_destroy(void *arg) -{ - struct xe_hwmon *hwmon = arg; - - mutex_destroy(&hwmon->hwmon_lock); -} - int xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -1290,8 +1315,7 @@ int xe_hwmon_register(struct xe_device *xe) if (!hwmon) return -ENOMEM; - mutex_init(&hwmon->hwmon_lock); - ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); + ret = devm_mutex_init(dev, &hwmon->hwmon_lock); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..48dfcb41fa08 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include <linux/array_size.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_present - I2C controller is present and functional + * @xe: xe device instance + * + * Check whether the I2C controller is present and functioning with valid + * endpoint cookie. + * + * Return: %true if present, %false otherwise. + */ +bool xe_i2c_present(struct xe_device *xe) +{ + return xe->i2c && xe->i2c->ep.cookie == XE_I2C_EP_COOKIE_DEVICE; +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->i2c || !xe->i2c->adapter_irq) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_present(xe)) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe_i2c_present(xe)) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..ecd5f10358e2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include <linux/bits.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +bool xe_i2c_present(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline bool xe_i2c_present(struct xe_device *xe) { return false; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..870edaf69388 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,10 +18,13 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_hw_error.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -160,7 +163,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -260,7 +263,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -466,6 +469,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl); gt_irq_handler(tile, master_ctl, intr_dw, identity); + xe_hw_error_irq_handler(tile, master_ctl); /* * Display interrupts (including display backlight operations @@ -476,6 +480,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } @@ -550,7 +555,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); @@ -753,6 +758,8 @@ int xe_irq_install(struct xe_device *xe) int nvec = 1; int err; + xe_hw_error_init(xe); + xe_irq_reset(xe); if (xe_device_has_msix(xe)) { diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c new file mode 100644 index 000000000000..768442ca7da6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/component.h> +#include <linux/delay.h> +#include <linux/firmware.h> + +#include <drm/drm_managed.h> +#include <drm/intel/i915_component.h> +#include <drm/intel/intel_lb_mei_interface.h> +#include <drm/drm_print.h> + +#include "xe_device.h" +#include "xe_late_bind_fw.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" +#include "xe_pm.h" + +/* + * The component should load quite quickly in most cases, but it could take + * a bit. Using a very big timeout just to cover the worst case scenario + */ +#define LB_INIT_TIMEOUT_MS 20000 + +/* + * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for + * other OS components to release the MEI CL handle + */ +#define LB_FW_LOAD_RETRY_MAXCOUNT 30 +#define LB_FW_LOAD_RETRY_PAUSE_MS 200 + +static const u32 fw_id_to_type[] = { + [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL, + }; + +static const char * const fw_id_to_name[] = { + [XE_LB_FW_FAN_CONTROL] = "fan_control", + }; + +static struct xe_device * +late_bind_to_xe(struct xe_late_bind *late_bind) +{ + return container_of(late_bind, struct xe_device, late_bind); +} + +static struct xe_device * +late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw) +{ + return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]); +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_cpd_header(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *manifest_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct gsc_cpd_header_v2 *header = data; + const struct gsc_manifest_header *manifest; + const struct gsc_cpd_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* manifest_entry is mandatory */ + xe_assert(xe, manifest_entry); + + if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct gsc_cpd_header_v2)) { + drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the manifest first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, manifest_entry) == 0) + offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_manifest_header); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + manifest = data + offset; + + lb_fw->version = manifest->fw_version; + + return 0; +} + +/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */ +static int parse_lb_layout(struct xe_late_bind_fw *lb_fw, + const void *data, size_t size, const char *fpt_entry) +{ + struct xe_device *xe = late_bind_fw_to_xe(lb_fw); + const struct csc_fpt_header *header = data; + const struct csc_fpt_entry *entry; + size_t min_size = sizeof(*header); + u32 offset = 0; + int i; + + /* fpt_entry is mandatory */ + xe_assert(xe, fpt_entry); + + if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER) + return -ENOENT; + + if (header->header_length < sizeof(struct csc_fpt_header)) { + drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n", + fw_id_to_name[lb_fw->id], header->header_length); + return -EINVAL; + } + + min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries; + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + /* Look for the cpd header first */ + entry = (void *)header + header->header_length; + for (i = 0; i < header->num_of_entries; i++, entry++) + if (strcmp(entry->name, fpt_entry) == 0) + offset = entry->offset; + + if (!offset) { + drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n", + fw_id_to_name[lb_fw->id]); + return -ENODATA; + } + + min_size = offset + sizeof(struct gsc_cpd_header_v2); + if (size < min_size) { + drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n", + fw_id_to_name[lb_fw->id], size, min_size); + return -ENODATA; + } + + return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man"); +} + +static const char *xe_late_bind_parse_status(uint32_t status) +{ + switch (status) { + case INTEL_LB_STATUS_SUCCESS: + return "success"; + case INTEL_LB_STATUS_4ID_MISMATCH: + return "4Id Mismatch"; + case INTEL_LB_STATUS_ARB_FAILURE: + return "ARB Failure"; + case INTEL_LB_STATUS_GENERAL_ERROR: + return "General Error"; + case INTEL_LB_STATUS_INVALID_PARAMS: + return "Invalid Params"; + case INTEL_LB_STATUS_INVALID_SIGNATURE: + return "Invalid Signature"; + case INTEL_LB_STATUS_INVALID_PAYLOAD: + return "Invalid Payload"; + case INTEL_LB_STATUS_TIMEOUT: + return "Timeout"; + default: + return "Unknown error"; + } +} + +static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_tile *root_tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(root_tile, + PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL); +} + +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload && late_bind->wq) { + drm_dbg(&xe->drm, "Flush work: load %s firmware\n", + fw_id_to_name[lbfw->id]); + flush_work(&lbfw->work); + } + } +} + +static void xe_late_bind_work(struct work_struct *work) +{ + struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work); + struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind, + late_bind_fw[lbfw->id]); + struct xe_device *xe = late_bind_to_xe(late_bind); + int retry = LB_FW_LOAD_RETRY_MAXCOUNT; + int ret; + int slept; + + xe_device_assert_mem_access(xe); + + /* we can queue this before the component is bound */ + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) { + if (late_bind->component.ops) + break; + msleep(100); + } + + if (!late_bind->component.ops) { + drm_err(&xe->drm, "Late bind component not bound\n"); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + goto out; + } + + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]); + + do { + ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev, + lbfw->type, + lbfw->flags, + lbfw->payload, + lbfw->payload_size); + if (!ret) + break; + msleep(LB_FW_LOAD_RETRY_PAUSE_MS); + } while (--retry && ret == -EBUSY); + + if (!ret) { + drm_dbg(&xe->drm, "Load %s firmware successful\n", + fw_id_to_name[lbfw->id]); + goto out; + } + + if (ret > 0) + drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n", + fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret)); + else + drm_err(&xe->drm, "Load %s firmware failed with err %d", + fw_id_to_name[lbfw->id], ret); + /* Do not re-attempt fw load */ + drmm_kfree(&xe->drm, (void *)lbfw->payload); + lbfw->payload = NULL; + +out: + xe_pm_runtime_put(xe); +} + +int xe_late_bind_fw_load(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct xe_late_bind_fw *lbfw; + int fw_id; + + if (!late_bind->component_added) + return -ENODEV; + + if (late_bind->disable) + return 0; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + lbfw = &late_bind->late_bind_fw[fw_id]; + if (lbfw->payload) { + xe_pm_runtime_get_noresume(xe); + queue_work(late_bind->wq, &lbfw->work); + } + } + return 0; +} + +static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_late_bind_fw *lb_fw; + const struct firmware *fw; + u32 num_fans; + int ret; + + if (fw_id >= XE_LB_FW_MAX_ID) + return -EINVAL; + + lb_fw = &late_bind->late_bind_fw[fw_id]; + + lb_fw->id = fw_id; + lb_fw->type = fw_id_to_type[lb_fw->id]; + lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT; + + if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) { + ret = xe_late_bind_fw_num_fans(late_bind, &num_fans); + if (ret) { + drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret); + return 0; /* Not a fatal error, continue without fan control */ + } + drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans); + if (!num_fans) + return 0; + } + + snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin", + fw_id_to_name[lb_fw->id], pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device); + + drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path); + ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev); + if (ret) { + drm_dbg(&xe->drm, "%s late binding fw not available for current device", + fw_id_to_name[lb_fw->id]); + return 0; + } + + if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) { + drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n", + lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE); + release_firmware(fw); + return -ENODATA; + } + + ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES"); + if (ret) + return ret; + + lb_fw->payload_size = fw->size; + lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL); + if (!lb_fw->payload) { + release_firmware(fw); + return -ENOMEM; + } + + drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n", + fw_id_to_name[lb_fw->id], lb_fw->blob_path, + lb_fw->version.major, lb_fw->version.minor, + lb_fw->version.hotfix, lb_fw->version.build); + + memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size); + release_firmware(fw); + INIT_WORK(&lb_fw->work, xe_late_bind_work); + + return 0; +} + +static int xe_late_bind_fw_init(struct xe_late_bind *late_bind) +{ + int ret; + int fw_id; + + late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0); + if (!late_bind->wq) + return -ENOMEM; + + for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) { + ret = __xe_late_bind_fw_init(late_bind, fw_id); + if (ret) + return ret; + } + + return 0; +} + +static int xe_late_bind_component_bind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + late_bind->component.ops = data; + late_bind->component.mei_dev = mei_kdev; + + return 0; +} + +static void xe_late_bind_component_unbind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe_device(xe_kdev); + struct xe_late_bind *late_bind = &xe->late_bind; + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component.ops = NULL; +} + +static const struct component_ops xe_late_bind_component_ops = { + .bind = xe_late_bind_component_bind, + .unbind = xe_late_bind_component_unbind, +}; + +static void xe_late_bind_remove(void *arg) +{ + struct xe_late_bind *late_bind = arg; + struct xe_device *xe = late_bind_to_xe(late_bind); + + xe_late_bind_wait_for_worker_completion(late_bind); + + late_bind->component_added = false; + + component_del(xe->drm.dev, &xe_late_bind_component_ops); + if (late_bind->wq) { + destroy_workqueue(late_bind->wq); + late_bind->wq = NULL; + } +} + +/** + * xe_late_bind_init() - add xe mei late binding component + * @late_bind: pointer to late bind structure. + * + * Return: 0 if the initialization was successful, a negative errno otherwise. + */ +int xe_late_bind_init(struct xe_late_bind *late_bind) +{ + struct xe_device *xe = late_bind_to_xe(late_bind); + int err; + + if (!xe->info.has_late_bind) + return 0; + + if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) { + drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n"); + return 0; + } + + err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops, + INTEL_COMPONENT_LB); + if (err < 0) { + drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err)); + return err; + } + + late_bind->component_added = true; + + err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind); + if (err) + return err; + + err = xe_late_bind_fw_init(late_bind); + if (err) + return err; + + return xe_late_bind_fw_load(late_bind); +} diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h new file mode 100644 index 000000000000..07e437390539 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_FW_H_ +#define _XE_LATE_BIND_FW_H_ + +#include <linux/types.h> + +struct xe_late_bind; + +int xe_late_bind_init(struct xe_late_bind *late_bind); +int xe_late_bind_fw_load(struct xe_late_bind *late_bind); +void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind); + +#endif diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h new file mode 100644 index 000000000000..0f5da89ce98b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_LATE_BIND_TYPES_H_ +#define _XE_LATE_BIND_TYPES_H_ + +#include <linux/iosys-map.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> +#include "xe_uc_fw_abi.h" + +#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K + +/** + * xe_late_bind_fw_id - enum to determine late binding fw index + */ +enum xe_late_bind_fw_id { + XE_LB_FW_FAN_CONTROL = 0, + XE_LB_FW_MAX_ID +}; + +/** + * struct xe_late_bind_fw + */ +struct xe_late_bind_fw { + /** @id: firmware index */ + u32 id; + /** @blob_path: firmware binary path */ + char blob_path[PATH_MAX]; + /** @type: firmware type */ + u32 type; + /** @flags: firmware flags */ + u32 flags; + /** @payload: to store the late binding blob */ + const u8 *payload; + /** @payload_size: late binding blob payload_size */ + size_t payload_size; + /** @work: worker to upload latebind blob */ + struct work_struct work; + /** @version: late binding blob manifest version */ + struct gsc_version version; +}; + +/** + * struct xe_late_bind_component - Late Binding services component + * @mei_dev: device that provide Late Binding service. + * @ops: Ops implemented by Late Binding driver, used by Xe driver. + * + * Communication between Xe and MEI drivers for Late Binding services + */ +struct xe_late_bind_component { + struct device *mei_dev; + const struct intel_lb_component_ops *ops; +}; + +/** + * struct xe_late_bind + */ +struct xe_late_bind { + /** @component: struct for communication with mei component */ + struct xe_late_bind_component component; + /** @late_bind_fw: late binding firmware array */ + struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID]; + /** @wq: workqueue to submit request to download late bind blob */ + struct workqueue_struct *wq; + /** @component_added: whether the component has been added */ + bool component_added; + /** @disable: to block late binding reload during pm resume flow*/ + bool disable; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..62fc5a1a332d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_tlb_inval.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -66,18 +67,21 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level goto out; } - bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL, - PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * - lmtt->ops->lmtt_pte_num(level)), - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_FLAG_NEEDS_64K); + bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), + PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * + lmtt->ops->lmtt_pte_num(level)), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_FLAG_NEEDS_64K, false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +95,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -188,14 +195,17 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt) struct xe_tile *tile = lmtt_to_tile(lmtt); struct xe_device *xe = tile_to_xe(tile); dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE); + struct xe_gt *gt; + u8 id; lmtt_debug(lmtt, "DIR offset %pad\n", &offset); lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo)); lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K)); - xe_mmio_write32(&tile->mmio, - GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, - LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); + for_each_gt_on_tile(gt, tile, id) + xe_mmio_write32(>->mmio, + GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG, + LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K)); } /** @@ -216,6 +226,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_tlb_inval_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_tlb_inval_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_tlb_inval_fence_init(>->tlb_inval, fence, true); + err = xe_tlb_inval_all(>->tlb_inval, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_tlb_inval_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -226,9 +288,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: @@ -265,6 +332,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); @@ -386,11 +454,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 37598588a54f..47e9df775072 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -8,6 +8,7 @@ #include <generated/xe_wa_oob.h> #include <linux/ascii85.h> +#include <linux/panic.h> #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" @@ -16,6 +17,7 @@ #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue_types.h" @@ -39,8 +41,32 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K -#define LRC_WA_BB_SIZE SZ_4K + +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -48,6 +74,23 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (XE_GT_WA(gt, 16010904313) && + (class == XE_ENGINE_CLASS_RENDER || + class == XE_ENGINE_CLASS_COMPUTE)) + return true; + + if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + class, NULL)) + return true; + + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -582,8 +625,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -663,7 +704,13 @@ u32 xe_lrc_regs_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; } -static size_t lrc_reg_size(struct xe_device *xe) +/** + * xe_lrc_reg_size() - Get size of the LRC registers area within queues + * @xe: the &xe_device struct instance + * + * Returns: Size of the LRC registers area for current platform + */ +size_t xe_lrc_reg_size(struct xe_device *xe) { if (GRAPHICS_VERx100(xe) >= 1250) return 96 * sizeof(u32); @@ -673,7 +720,7 @@ static size_t lrc_reg_size(struct xe_device *xe) size_t xe_lrc_skip_size(struct xe_device *xe) { - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); + return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -717,8 +764,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -899,6 +961,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) return data; } +/** + * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC + * of given engine. + * @hwe: the &xe_hw_engine struct instance + */ +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe) +{ + struct xe_gt *gt = hwe->gt; + u32 *regs; + + if (!gt->default_lrc[hwe->class]) + return; + + regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE; + set_memory_based_intr(regs, hwe); +} + +/** + * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data + * for given LRC. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @regs: scratch buffer to be used as temporary storage + */ +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs) +{ + struct xe_gt *gt = hwe->gt; + struct iosys_map map; + size_t regs_len; + + if (!xe_device_uses_memirq(gt_to_xe(gt))) + return; + + map = __xe_lrc_regs_map(lrc); + regs_len = xe_lrc_reg_size(gt_to_xe(gt)); + xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len); + set_memory_based_intr(regs, hwe); + xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len); +} + static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); @@ -940,8 +1043,10 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - u32 *batch, size_t max_len) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { u32 *cmd = batch; @@ -968,91 +1073,340 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine * return cmd - batch; } -struct wa_bb_setup { +static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 16010904313) || + !(hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE || + hwe->class == XE_ENGINE_CLASS_COPY || + hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE || + hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 12)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO | + MI_LRM_ASYNC; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO; + *cmd++ = RING_CTX_TIMESTAMP(0).addr; + *cmd++ = ts_addr; + *cmd++ = 0; + + return cmd - batch; +} + +static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + u32 *cmd = batch; + + if (!XE_GT_WA(lrc->gt, 18022495364) || + hwe->class != XE_ENGINE_CLASS_RENDER) + return 0; + + if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) + return -ENOSPC; + + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = CS_DEBUG_MODE1(0).addr; + *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + + return cmd - batch; +} + +struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); }; -static size_t wa_bb_offset(struct xe_lrc *lrc) -{ - return lrc->bo->size - LRC_WA_BB_SIZE; -} +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; -static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +static int setup_bo(struct bo_setup_state *state) { - const size_t max_size = LRC_WA_BB_SIZE; - static const struct wa_bb_setup funcs[] = { - { .setup = wa_bb_setup_utilization }, - }; ssize_t remain; - u32 *cmd, *buf = NULL; - if (lrc->bo->vmap.is_iomem) { - buf = kmalloc(max_size, GFP_KERNEL); - if (!buf) + if (state->lrc->bo->vmap.is_iomem) { + if (!state->buffer) return -ENOMEM; - cmd = buf; + state->ptr = state->buffer; } else { - cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; } - remain = max_size / sizeof(*cmd); + remain = state->max_size / sizeof(u32); - for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) { - ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); remain -= len; /* - * There should always be at least 1 additional dword for - * the end marker + * Caller has asked for at least reserve_dw to remain unused. */ - if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1)) + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) goto fail; - cmd += len; + state->ptr += len; + state->written += len; } - *cmd++ = MI_BATCH_BUFFER_END; + return 0; - if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, - wa_bb_offset(lrc), buf, - (cmd - buf) * sizeof(*cmd)); - kfree(buf); - } +fail: + return -ENOSPC; +} + +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->buffer) + return; - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + - wa_bb_offset(lrc) + 1); + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); +} + +/** + * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks. + * @lrc: the &xe_lrc struct instance + * @hwe: the &xe_hw_engine struct instance + * @scratch: preallocated scratch buffer for temporary storage + * Return: 0 on success, negative error code on failure + */ +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_timestamp_wa }, + { .setup = setup_invalidate_state_cache_wa }, + { .setup = setup_utilization_wa }, + { .setup = setup_configfs_post_ctx_restore_bb }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .buffer = scratch, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); return 0; +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + u32 *buf = NULL; + int ret; + + if (lrc->bo->vmap.is_iomem) + buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); + + ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); -fail: kfree(buf); - return -ENOSPC; + + return ret; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup rcs_funcs[] = { + { .setup = setup_timestamp_wa }, + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + static const struct bo_setup xcs_funcs[] = { + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .buffer = NULL, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } else { + state.funcs = xcs_funcs; + state.num_funcs = ARRAY_SIZE(xcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + if (lrc->bo->vmap.is_iomem) + state.buffer = kmalloc(state.max_size, GFP_KERNEL); + + ret = setup_bo(&state); + if (ret) { + kfree(state.buffer); + return ret; + } + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + kfree(state.buffer); + + /* + * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it + * varies per engine class, but the default is good enough + */ + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1061,45 +1415,37 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, - lrc_size + LRC_WA_BB_SIZE, - ttm_bo_type_kernel, - bo_flags); + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, + bo_size, + ttm_bo_type_kernel, + bo_flags, false); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1153,7 +1499,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1183,6 +1529,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (err) goto err_lrc_finish; + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; + return 0; err_lrc_finish: @@ -1237,6 +1587,23 @@ void xe_lrc_destroy(struct kref *ref) kfree(lrc); } +/** + * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC. + * @lrc: the &xe_lrc struct instance + */ +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) { + xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, + __xe_lrc_indirect_ring_ggtt_addr(lrc)); + + xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, + __xe_lrc_ring_ggtt_addr(lrc)); + } else { + xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); + } +} + void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1775,7 +2142,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1802,7 +2169,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b * continue to emit all of the SVG state since it's best not to leak * any of the state between contexts, even if that leakage is harmless. */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); } @@ -1810,7 +2177,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1833,12 +2200,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) @@ -1859,8 +2228,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - - LRC_WA_BB_SIZE; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..188565465779 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -42,6 +42,8 @@ struct xe_lrc_snapshot { #define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4) #define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4) +#define LRC_WA_BB_SIZE SZ_4K + #define XE_LRC_CREATE_RUNALONE 0x1 #define XE_LRC_CREATE_PXP 0x2 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, @@ -88,6 +90,10 @@ bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); +void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc); +void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe); +void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *regs); u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr); void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val); @@ -106,13 +112,14 @@ s32 xe_lrc_start_seqno(struct xe_lrc *lrc); u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); +size_t xe_lrc_reg_size(struct xe_device *xe); size_t xe_lrc_skip_size(struct xe_device *xe); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); @@ -124,6 +131,8 @@ u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); +int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, + u32 *scratch); /** * xe_lrc_update_timestamp - readout LRC timestamp and update cached value diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 883e550a9423..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,14 +22,15 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..a36ce7dce8cc 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -9,6 +9,7 @@ #include <linux/sizes.h> #include <drm/drm_managed.h> +#include <drm/drm_pagemap.h> #include <drm/ttm/ttm_tt.h> #include <uapi/drm/xe_drm.h> @@ -30,10 +31,13 @@ #include "xe_mocs.h" #include "xe_pt.h" #include "xe_res_cursor.h" +#include "xe_sa.h" #include "xe_sched_job.h" #include "xe_sync.h" #include "xe_trace_bo.h" +#include "xe_validation.h" #include "xe_vm.h" +#include "xe_vram.h" /** * struct xe_migrate - migrate context. @@ -82,20 +86,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE - -/** - * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. - * @tile: The tile. - * - * Returns the default migrate exec queue of this tile. - * - * Return: The default migrate exec queue - */ -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile) -{ - return tile->migrate->q; -} +#define MAX_PTE_PER_SDI 0x1FEU static void xe_migrate_fini(void *arg) { @@ -130,38 +121,39 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte) u64 identity_offset = IDENTITY_OFFSET; if (GRAPHICS_VER(xe) >= 20 && is_comp_pte) - identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); + identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size + (xe->mem.vram), SZ_1G); - addr -= xe->mem.vram.dpa_base; + addr -= xe_vram_region_dpa_base(xe->mem.vram); return addr + (identity_offset << xe_pt_shift(2)); } static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo, u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs) { + struct xe_vram_region *vram = xe->mem.vram; + resource_size_t dpa_base = xe_vram_region_dpa_base(vram); u64 pos, ofs, flags; u64 entry; /* XXX: Unclear if this should be usable_size? */ - u64 vram_limit = xe->mem.vram.actual_physical_size + - xe->mem.vram.dpa_base; + u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base; u32 level = 2; ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8; flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, true, 0); - xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M)); + xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M)); /* * Use 1GB pages when possible, last chunk always use 2M * pages as mixing reserved memory (stolen, WOCPM) with a single * mapping is not allowed on certain platforms. */ - for (pos = xe->mem.vram.dpa_base; pos < vram_limit; + for (pos = dpa_base; pos < vram_limit; pos += SZ_1G, ofs += 8) { if (pos + SZ_1G >= vram_limit) { - entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs, - pat_index); + entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs); xe_map_wr(xe, &bo->vmap, ofs, u64, entry); flags = vm->pt_ops->pte_encode_addr(xe, 0, @@ -182,7 +174,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, } static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { struct xe_device *xe = tile_to_xe(tile); u16 pat_index = xe->pat.idx[XE_CACHE_WB]; @@ -203,19 +195,19 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_PAGETABLE); + XE_BO_FLAG_PAGETABLE, exec); if (IS_ERR(bo)) return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; - entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; + entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE; @@ -236,7 +228,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -247,13 +239,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -283,15 +275,14 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, flags = XE_PDE_64K; entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) * - XE_PAGE_SIZE, pat_index); + XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64, entry | flags); } /* Write PDE's that point to our BO. */ - for (i = 0; i < map_ofs / PAGE_SIZE; i++) { - entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE, - pat_index); + for (i = 0; i < map_ofs / XE_PAGE_SIZE; i++) { + entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE); xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE + (i + 1) * 8, u64, entry); @@ -306,12 +297,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; + resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram); xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); - xe_assert(xe, xe->mem.vram.actual_physical_size <= - (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G); /* * Identity map the entire vram for compressed pat_index for xe2+ @@ -320,11 +311,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) { u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + - DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G); + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; - xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); + xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - + IDENTITY_OFFSET / 2) * SZ_1G); xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset, comp_pat_index, pt31_ofs); } @@ -387,38 +378,63 @@ static bool xe_migrate_needs_ccs_emit(struct xe_device *xe) } /** - * xe_migrate_init() - Initialize a migrate context - * @tile: Back-pointer to the tile we're initializing for. + * xe_migrate_alloc - Allocate a migrate struct for a given &xe_tile + * @tile: &xe_tile + * + * Allocates a &xe_migrate for a given tile. * - * Return: Pointer to a migrate context on success. Error pointer on error. + * Return: &xe_migrate on success, or NULL when out of memory. */ -struct xe_migrate *xe_migrate_init(struct xe_tile *tile) +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile) +{ + struct xe_migrate *m = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*m), GFP_KERNEL); + + if (m) + m->tile = tile; + return m; +} + +static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm) { struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; + struct drm_exec exec; + int err = 0; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + err = xe_migrate_prepare_vm(tile, m, vm, &exec); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + } + + return err; +} + +/** + * xe_migrate_init() - Initialize a migrate context + * @m: The migration context + * + * Return: 0 if successful, negative error code on failure + */ +int xe_migrate_init(struct xe_migrate *m) +{ + struct xe_tile *tile = m->tile; struct xe_gt *primary_gt = tile->primary_gt; - struct xe_migrate *m; + struct xe_device *xe = tile_to_xe(tile); struct xe_vm *vm; int err; - m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); - - m->tile = tile; - /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) - return ERR_CAST(vm); + return PTR_ERR(vm); - xe_vm_lock(vm, false); - err = xe_migrate_prepare_vm(tile, m, vm); - xe_vm_unlock(vm); - if (err) { - xe_vm_close_and_put(vm); - return ERR_PTR(err); - } + err = xe_migrate_lock_prepare_vm(tile, m, vm); + if (err) + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, @@ -427,8 +443,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) false); u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt); - if (!hwe || !logical_mask) - return ERR_PTR(-EINVAL); + if (!hwe || !logical_mask) { + err = -EINVAL; + goto err_out; + } /* * XXX: Currently only reserving 1 (likely slow) BCS instance on @@ -437,16 +455,18 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | - EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0); + EXEC_QUEUE_FLAG_HIGH_PRIORITY | + EXEC_QUEUE_FLAG_MIGRATE, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT, 0); + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE, 0); } if (IS_ERR(m->q)) { - xe_vm_close_and_put(vm); - return ERR_CAST(m->q); + err = PTR_ERR(m->q); + goto err_out; } mutex_init(&m->job_mutex); @@ -456,7 +476,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m); if (err) - return ERR_PTR(err); + return err; if (IS_DGFX(xe)) { if (xe_migrate_needs_ccs_emit(xe)) @@ -471,7 +491,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) (unsigned long long)m->min_chunk_size); } - return m; + return err; + +err_out: + xe_vm_close_and_put(vm); + return err; + } static u64 max_mem_transfer_per_pass(struct xe_device *xe) @@ -768,7 +793,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -791,7 +816,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -834,11 +859,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0, &src_L0_ofs, &src_L0_pt, 0, 0, avail_pts); - - pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; - batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, - &dst_L0_ofs, &dst_L0_pt, 0, - avail_pts, avail_pts); + if (copy_only_ccs) { + dst_L0_ofs = src_L0_ofs; + } else { + pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0; + batch_size += pte_update_size(m, pte_flags, dst, + &dst_it, &src_L0, + &dst_L0_ofs, &dst_L0_pt, + 0, avail_pts, avail_pts); + } if (copy_system_ccs) { xe_assert(xe, type_device); @@ -863,12 +892,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) xe_res_next(&dst_it, src_L0); - else + else if (!copy_only_ccs) emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs, &dst_it, src_L0, dst); @@ -896,11 +925,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, flush_flags); + xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB); if (!fence) { err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); - if (!err && src_bo != dst_bo) + if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv) err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, DMA_RESV_USAGE_BOOKKEEP); if (err) @@ -940,6 +969,167 @@ err_sync: return fence; } +/** + * xe_migrate_lrc() - Get the LRC from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to LRC on success, error on failure + */ +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate) +{ + return migrate->q->lrc[0]; +} + +static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw, int i, + u32 flags) +{ + struct xe_lrc *lrc = xe_exec_queue_lrc(q); + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | flags; + dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)) | + MI_FLUSH_DW_USE_GTT; + dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(lrc)); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + return i; +} + +/** + * xe_migrate_ccs_rw_copy() - Copy content of TTM resources. + * @tile: Tile whose migration context to be used. + * @q : Execution to be used along with migration context. + * @src_bo: The buffer object @src is currently bound to. + * @read_write : Creates BB commands for CCS read/write. + * + * Creates batch buffer instructions to copy CCS metadata from CCS pool to + * memory and vice versa. + * + * This function should only be called for IGPU. + * + * Return: 0 if successful, negative error code on failure. + */ +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write) + +{ + bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX; + bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX; + struct ttm_resource *src = src_bo->ttm.resource; + struct xe_migrate *m = tile->migrate; + struct xe_gt *gt = tile->primary_gt; + u32 batch_size, batch_size_allocated; + struct xe_device *xe = gt_to_xe(gt); + struct xe_res_cursor src_it, ccs_it; + u64 size = xe_bo_size(src_bo); + struct xe_bb *bb = NULL; + u64 src_L0, src_L0_ofs; + u32 src_L0_pt; + int err; + + xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it); + + xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo), + PAGE_ALIGN(xe_device_ccs_bytes(xe, size)), + &ccs_it); + + /* Calculate Batch buffer size */ + batch_size = 0; + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = min_t(u64, max_mem_transfer_per_pass(xe), size); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + + /* Add copy commands size here */ + batch_size += EMIT_COPY_CCS_DW; + + size -= src_L0; + } + + bb = xe_bb_ccs_new(gt, batch_size, read_write); + if (IS_ERR(bb)) { + drm_err(&xe->drm, "BB allocation failed.\n"); + err = PTR_ERR(bb); + goto err_ret; + } + + batch_size_allocated = batch_size; + size = xe_bo_size(src_bo); + batch_size = 0; + + /* + * Emit PTE and copy commands here. + * The CCS copy command can only support limited size. If the size to be + * copied is more than the limit, divide copy into chunks. So, calculate + * sizes here again before copy command is emitted. + */ + while (size) { + batch_size += 10; /* Flush + ggtt addr + 2 NOP */ + u32 flush_flags = 0; + u64 ccs_ofs, ccs_size; + u32 ccs_pt; + + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE; + + src_L0 = xe_migrate_res_sizes(m, &src_it); + + batch_size += pte_update_size(m, false, src, &src_it, &src_L0, + &src_L0_ofs, &src_L0_pt, 0, 0, + avail_pts); + + ccs_size = xe_device_ccs_bytes(xe, src_L0); + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs, + &ccs_pt, 0, avail_pts, avail_pts); + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); + batch_size += EMIT_COPY_CCS_DW; + + emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + + bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + src_L0_ofs, dst_is_pltt, + src_L0, ccs_ofs, true); + bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags); + + size -= src_L0; + } + + xe_assert(xe, (batch_size_allocated == bb->len)); + src_bo->bb_ccs[read_write] = bb; + + return 0; + +err_ret: + return err; +} + +/** + * xe_get_migrate_exec_queue() - Get the execution queue from migrate context. + * @migrate: Migrate context. + * + * Return: Pointer to execution queue on success, error on failure + */ +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate) +{ + return migrate->q; +} + static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch) { @@ -1064,7 +1254,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1076,9 +1266,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1119,11 +1309,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, size -= clear_L0; /* Preemption is enabled again by the ring ops. */ - if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) + if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) { xe_res_next(&src_it, clear_L0); - else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, - &src_it, clear_L0, dst); + } else { + emit_pte(m, bb, clear_L0_pt, clear_vram, + clear_only_system_ccs, &src_it, clear_L0, dst); + flush_flags |= MI_INVALIDATE_TLB; + } bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; @@ -1134,7 +1326,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (xe_migrate_needs_ccs_emit(xe)) { emit_copy_ccs(gt, bb, clear_L0_ofs, true, m->cleared_mem_ofs, false, clear_L0); - flush_flags = MI_FLUSH_DW_CCS; + flush_flags |= MI_FLUSH_DW_CCS; } job = xe_bb_create_migration_job(m->q, bb, @@ -1407,7 +1599,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1469,6 +1661,8 @@ next_cmd: goto err_sa; } + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); + if (ops->pre_commit) { pt_update->job = job; err = ops->pre_commit(pt_update); @@ -1553,15 +1747,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1569,7 +1765,8 @@ static u32 pte_update_cmd_size(u64 size) static void build_pt_update_batch_sram(struct xe_migrate *m, struct xe_bb *bb, u32 pt_offset, - dma_addr_t *sram_addr, u32 size) + struct drm_pagemap_addr *sram_addr, + u32 size) { u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB]; u32 ptes; @@ -1577,7 +1774,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; @@ -1587,14 +1784,18 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes -= chunk; while (chunk--) { - u64 addr = sram_addr[i++] & PAGE_MASK; + u64 addr = sram_addr[i].addr & PAGE_MASK; + xe_tile_assert(m->tile, sram_addr[i].proto == + DRM_INTERCONNECT_SYSTEM); xe_tile_assert(m->tile, addr); addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe, addr, pat_index, 0, false, 0); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); + + i++; } } } @@ -1610,7 +1811,8 @@ enum xe_migrate_copy_dir { static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned long len, unsigned long sram_offset, - dma_addr_t *sram_addr, u64 vram_addr, + struct drm_pagemap_addr *sram_addr, + u64 vram_addr, const enum xe_migrate_copy_dir dir) { struct xe_gt *gt = m->tile->primary_gt; @@ -1626,6 +1828,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ? PAGE_SIZE : 4; int err; + unsigned long i, j; if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) || (sram_offset | vram_addr) & XE_CACHELINE_MASK)) @@ -1642,6 +1845,24 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, return ERR_PTR(err); } + /* + * If the order of a struct drm_pagemap_addr entry is greater than 0, + * the entry is populated by GPU pagemap but subsequent entries within + * the range of that order are not populated. + * build_pt_update_batch_sram() expects a fully populated array of + * struct drm_pagemap_addr. Ensure this is the case even with higher + * orders. + */ + for (i = 0; i < npages;) { + unsigned int order = sram_addr[i].order; + + for (j = 1; j < NR_PAGES(order) && i + j < npages; j++) + if (!sram_addr[i + j].addr) + sram_addr[i + j].addr = sram_addr[i].addr + j * PAGE_SIZE; + + i += NR_PAGES(order); + } + build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE, sram_addr, len + sram_offset); @@ -1667,7 +1888,7 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m, goto err; } - xe_sched_job_add_migrate_flush(job, 0); + xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB); mutex_lock(&m->job_mutex); xe_sched_job_arm(job); @@ -1692,7 +1913,7 @@ err: * xe_migrate_to_vram() - Migrate to VRAM * @m: The migration context. * @npages: Number of pages to migrate. - * @src_addr: Array of dma addresses (source of migrate) + * @src_addr: Array of DMA information (source of migrate) * @dst_addr: Device physical address of VRAM (destination of migrate) * * Copy from an array dma addresses to a VRAM device physical address @@ -1702,7 +1923,7 @@ err: */ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, + struct drm_pagemap_addr *src_addr, u64 dst_addr) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr, @@ -1714,7 +1935,7 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, * @m: The migration context. * @npages: Number of pages to migrate. * @src_addr: Device physical address of VRAM (source of migrate) - * @dst_addr: Array of dma addresses (destination of migrate) + * @dst_addr: Array of DMA information (destination of migrate) * * Copy from a VRAM device physical address to an array dma addresses * @@ -1724,61 +1945,65 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr) + struct drm_pagemap_addr *dst_addr) { return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr, XE_MIGRATE_COPY_TO_SRAM); } -static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr, +static void xe_migrate_dma_unmap(struct xe_device *xe, + struct drm_pagemap_addr *pagemap_addr, int len, int write) { unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); for (i = 0; i < npages; ++i) { - if (!dma_addr[i]) + if (!pagemap_addr[i].addr) break; - dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE, + dma_unmap_page(xe->drm.dev, pagemap_addr[i].addr, PAGE_SIZE, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); } - kfree(dma_addr); + kfree(pagemap_addr); } -static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe, - void *buf, int len, int write) +static struct drm_pagemap_addr *xe_migrate_dma_map(struct xe_device *xe, + void *buf, int len, + int write) { - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE); - dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL); - if (!dma_addr) + pagemap_addr = kcalloc(npages, sizeof(*pagemap_addr), GFP_KERNEL); + if (!pagemap_addr) return ERR_PTR(-ENOMEM); for (i = 0; i < npages; ++i) { dma_addr_t addr; struct page *page; + enum dma_data_direction dir = write ? DMA_TO_DEVICE : + DMA_FROM_DEVICE; if (is_vmalloc_addr(buf)) page = vmalloc_to_page(buf); else page = virt_to_page(buf); - addr = dma_map_page(xe->drm.dev, - page, 0, PAGE_SIZE, - write ? DMA_TO_DEVICE : - DMA_FROM_DEVICE); + addr = dma_map_page(xe->drm.dev, page, 0, PAGE_SIZE, dir); if (dma_mapping_error(xe->drm.dev, addr)) goto err_fault; - dma_addr[i] = addr; + pagemap_addr[i] = + drm_pagemap_addr_encode(addr, + DRM_INTERCONNECT_SYSTEM, + 0, dir); buf += PAGE_SIZE; } - return dma_addr; + return pagemap_addr; err_fault: - xe_migrate_dma_unmap(xe, dma_addr, len, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len, write); return ERR_PTR(-EFAULT); } @@ -1807,7 +2032,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, struct xe_device *xe = tile_to_xe(tile); struct xe_res_cursor cursor; struct dma_fence *fence = NULL; - dma_addr_t *dma_addr; + struct drm_pagemap_addr *pagemap_addr; unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK; int bytes_left = len, current_page = 0; void *orig_buf = buf; @@ -1815,18 +2040,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1835,22 +2064,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1859,14 +2088,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } - dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); - if (IS_ERR(dma_addr)) - return PTR_ERR(dma_addr); + pagemap_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); + if (IS_ERR(pagemap_addr)) + return PTR_ERR(pagemap_addr); - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); do { struct dma_fence *__fence; @@ -1880,21 +2110,30 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); - if (fence) - dma_fence_put(fence); + if (current_bytes & ~PAGE_MASK) { + int pitch = 4; + + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); + } __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, - dma_addr + current_page, + &pagemap_addr[current_page], vram_addr, write ? XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; @@ -1909,10 +2148,46 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, dma_fence_put(fence); out_err: - xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write); + xe_migrate_dma_unmap(xe, pagemap_addr, len + page_offset, write); return IS_ERR(fence) ? PTR_ERR(fence) : 0; } +/** + * xe_migrate_job_lock() - Lock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Lock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_lock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + +/** + * xe_migrate_job_unlock() - Unlock migrate job lock + * @m: The migration context. + * @q: Queue associated with the operation which requires a lock + * + * Unlock the migrate job lock if the queue is a migration queue, otherwise + * assert the VM's dma-resv is held (user queue's have own locking). + */ +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q) +{ + bool is_migrate = q == m->q; + + if (is_migrate) + mutex_unlock(&m->job_mutex); + else + xe_vm_assert_held(q->vm); /* User queues VM's should be locked */ +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_migrate.c" #endif diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index fb9839c1bae0..4fad324b6253 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -9,11 +9,13 @@ #include <linux/types.h> struct dma_fence; +struct drm_pagemap_addr; struct iosys_map; struct ttm_resource; struct xe_bo; struct xe_gt; +struct xe_tlb_inval_job; struct xe_exec_queue; struct xe_migrate; struct xe_migrate_pt_update; @@ -24,6 +26,8 @@ struct xe_vm; struct xe_vm_pgtable_update; struct xe_vma; +enum xe_sriov_vf_ccs_rw_ctxs; + /** * struct xe_migrate_pt_update_ops - Callbacks for the * xe_migrate_update_pgtables() function. @@ -89,21 +93,30 @@ struct xe_migrate_pt_update { struct xe_vma_ops *vops; /** @job: The job if a GPU page-table update. NULL otherwise */ struct xe_sched_job *job; + /** + * @ijob: The TLB invalidation job for primary GT. NULL otherwise + */ + struct xe_tlb_inval_job *ijob; + /** + * @mjob: The TLB invalidation job for media GT. NULL otherwise + */ + struct xe_tlb_inval_job *mjob; /** @tile_id: Tile ID of the update */ u8 tile_id; }; -struct xe_migrate *xe_migrate_init(struct xe_tile *tile); +struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile); +int xe_migrate_init(struct xe_migrate *m); struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m, unsigned long npages, - dma_addr_t *src_addr, + struct drm_pagemap_addr *src_addr, u64 dst_addr); struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m, unsigned long npages, u64 src_addr, - dma_addr_t *dst_addr); + struct drm_pagemap_addr *dst_addr); struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo, @@ -112,6 +125,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, + struct xe_bo *src_bo, + enum xe_sriov_vf_ccs_rw_ctxs read_write); + +struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate); +struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate); int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, unsigned long offset, void *buf, int len, int write); @@ -133,5 +152,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, void xe_migrate_wait(struct xe_migrate *m); -struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile); +void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q); +void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q); + #endif diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7357458bc0d2..ef6f3ea573a2 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -64,35 +67,6 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) if (xe->info.tile_count == 1) return; - /* Possibly override number of tile based on configuration register */ - if (!xe->info.skip_mtcfg) { - struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; - u32 mtcfg; - - /* - * Although the per-tile mmio regs are not yet initialized, this - * is fine as it's going to the root tile's mmio, that's - * guaranteed to be initialized earlier in xe_mmio_probe_early() - */ - mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); - tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; - - if (tile_count < xe->info.tile_count) { - drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); - xe->info.tile_count = tile_count; - - /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist - */ - xe->info.gt_count = xe->info.tile_count; - } - } - for_each_remote_tile(tile, xe, id) xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } @@ -163,7 +137,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -176,7 +150,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -190,7 +163,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -217,7 +189,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.c b/drivers/gpu/drm/xe/xe_mmio_gem.c new file mode 100644 index 000000000000..9a97c4387e4f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_mmio_gem.h" + +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_managed.h> + +#include "xe_device_types.h" + +/** + * DOC: Exposing MMIO regions to userspace + * + * In certain cases, the driver may allow userspace to mmap a portion of the hardware registers. + * + * This can be done as follows: + * 1. Call xe_mmio_gem_create() to create a GEM object with an mmap-able fake offset. + * 2. Use xe_mmio_gem_mmap_offset() on the created GEM object to retrieve the fake offset. + * 3. Provide the fake offset to userspace. + * 4. Userspace can call mmap with the fake offset. The length provided to mmap + * must match the size of the GEM object. + * 5. When the region is no longer needed, call xe_mmio_gem_destroy() to release the GEM object. + * + * NOTE: The exposed MMIO region must be page-aligned with regards to its BAR offset and size. + * + * WARNING: Exposing MMIO regions to userspace can have security and stability implications. + * Make sure not to expose any sensitive registers. + */ + +static void xe_mmio_gem_free(struct drm_gem_object *); +static int xe_mmio_gem_mmap(struct drm_gem_object *, struct vm_area_struct *); +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *); + +struct xe_mmio_gem { + struct drm_gem_object base; + phys_addr_t phys_addr; +}; + +static const struct vm_operations_struct vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, + .fault = xe_mmio_gem_vm_fault, +}; + +static const struct drm_gem_object_funcs xe_mmio_gem_funcs = { + .free = xe_mmio_gem_free, + .mmap = xe_mmio_gem_mmap, + .vm_ops = &vm_ops, +}; + +static inline struct xe_mmio_gem *to_xe_mmio_gem(struct drm_gem_object *obj) +{ + return container_of(obj, struct xe_mmio_gem, base); +} + +/** + * xe_mmio_gem_create - Expose an MMIO region to userspace + * @xe: The xe device + * @file: DRM file descriptor + * @phys_addr: Start of the exposed MMIO region + * @size: The size of the exposed MMIO region + * + * This function creates a GEM object that exposes an MMIO region with an mmap-able + * fake offset. + * + * See: "Exposing MMIO regions to userspace" + */ +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size) +{ + struct xe_mmio_gem *obj; + struct drm_gem_object *base; + int err; + + if ((phys_addr % PAGE_SIZE != 0) || (size % PAGE_SIZE != 0)) + return ERR_PTR(-EINVAL); + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return ERR_PTR(-ENOMEM); + + base = &obj->base; + base->funcs = &xe_mmio_gem_funcs; + obj->phys_addr = phys_addr; + + drm_gem_private_object_init(&xe->drm, base, size); + + err = drm_gem_create_mmap_offset(base); + if (err) + goto free_gem; + + err = drm_vma_node_allow(&base->vma_node, file); + if (err) + goto free_gem; + + return obj; + +free_gem: + xe_mmio_gem_free(base); + return ERR_PTR(err); +} + +/** + * xe_mmio_gem_mmap_offset - Return the mmap-able fake offset + * @gem: the GEM object created with xe_mmio_gem_create() + * + * This function returns the mmap-able fake offset allocated during + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem) +{ + return drm_vma_node_offset_addr(&gem->base.vma_node); +} + +static void xe_mmio_gem_free(struct drm_gem_object *base) +{ + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + + drm_gem_object_release(base); + kfree(obj); +} + +/** + * xe_mmio_gem_destroy - Destroy the GEM object that exposes an MMIO region + * @gem: the GEM object to destroy + * + * This function releases resources associated with the GEM object created by + * xe_mmio_gem_create(). + * + * See: "Exposing MMIO regions to userspace" + */ +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem) +{ + xe_mmio_gem_free(&gem->base); +} + +static int xe_mmio_gem_mmap(struct drm_gem_object *base, struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != base->size) + return -EINVAL; + + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + /* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 */ + vma->vm_pgoff = 0; + vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE); + + /* Defer actual mapping to the fault handler. */ + return 0; +} + +static void xe_mmio_gem_release_dummy_page(struct drm_device *dev, void *res) +{ + __free_page((struct page *)res); +} + +static vm_fault_t xe_mmio_gem_vm_fault_dummy_page(struct vm_area_struct *vma) +{ + struct drm_gem_object *base = vma->vm_private_data; + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + struct page *page; + unsigned long pfn; + unsigned long i; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + if (drmm_add_action_or_reset(dev, xe_mmio_gem_release_dummy_page, page)) + return VM_FAULT_OOM; + + pfn = page_to_pfn(page); + + /* Map the entire VMA to the same dummy page */ + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + + ret = vmf_insert_pfn(vma, addr, pfn); + if (ret & VM_FAULT_ERROR) + break; + } + + return ret; +} + +static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *base = vma->vm_private_data; + struct xe_mmio_gem *obj = to_xe_mmio_gem(base); + struct drm_device *dev = base->dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long i; + int idx; + + if (!drm_dev_enter(dev, &idx)) { + /* + * Provide a dummy page to avoid SIGBUS for events such as hot-unplug. + * This gives the userspace the option to recover instead of crashing. + * It is assumed the userspace will receive the notification via some + * other channel (e.g. drm uevent). + */ + return xe_mmio_gem_vm_fault_dummy_page(vma); + } + + for (i = 0; i < base->size; i += PAGE_SIZE) { + unsigned long addr = vma->vm_start + i; + unsigned long phys_addr = obj->phys_addr + i; + + ret = vmf_insert_pfn(vma, addr, PHYS_PFN(phys_addr)); + if (ret & VM_FAULT_ERROR) + break; + } + + drm_dev_exit(idx); + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.h b/drivers/gpu/drm/xe/xe_mmio_gem.h new file mode 100644 index 000000000000..4b76d5586ebb --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mmio_gem.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_MMIO_GEM_H_ +#define _XE_MMIO_GEM_H_ + +#include <linux/types.h> + +struct drm_file; +struct xe_device; +struct xe_mmio_gem; + +struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file, + phys_addr_t phys_addr, size_t size); +u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem); +void xe_mmio_gem_destroy(struct xe_mmio_gem *gem); + +#endif /* _XE_MMIO_GEM_H_ */ diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e332f3142435..d08338fc3bc1 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,31 +19,45 @@ #include "xe_sched_job.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 +#define DEFAULT_GUC_LOG_LEVEL 3 #else -#define DEFAULT_GUC_LOG_LEVEL 1 +#define DEFAULT_GUC_LOG_LEVEL 1 #endif +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_MAX_VFS ~0 +#define DEFAULT_MAX_VFS_STR "unlimited" +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, +#ifdef CONFIG_PCI_IOV + .max_vfs = DEFAULT_MAX_VFS, +#endif + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " @@ -63,18 +77,21 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " - "(0 = no VFs [default]; N = allow up to N VFs)"); + "(0=no VFs; N=allow up to N VFs " + "[default=" DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { @@ -118,24 +135,17 @@ static const struct init_funcs init_funcs[] = { }, }; -static int __init xe_call_init_func(unsigned int i) +static int __init xe_call_init_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return 0; - if (!init_funcs[i].init) - return 0; - - return init_funcs[i].init(); + if (func->init) + return func->init(); + return 0; } -static void xe_call_exit_func(unsigned int i) +static void xe_call_exit_func(const struct init_funcs *func) { - if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) - return; - if (!init_funcs[i].exit) - return; - - init_funcs[i].exit(); + if (func->exit) + func->exit(); } static int __init xe_init(void) @@ -143,10 +153,12 @@ static int __init xe_init(void) int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = xe_call_init_func(i); + err = xe_call_init_func(init_funcs + i); if (err) { + pr_info("%s: module_init aborted at %ps %pe\n", + DRIVER_NAME, init_funcs[i].init, ERR_PTR(err)); while (i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); return err; } } @@ -159,7 +171,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - xe_call_exit_func(i); + xe_call_exit_func(init_funcs + i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..33f4ac82fc80 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include <linux/intel_dg_nvm_aux.h> +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" +#include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev); + + kfree(nvm); +} + +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4829ed46a8b4..a4894eb0d7f3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -403,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -435,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -822,7 +822,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) u32 sqcnt1; /* Enable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -883,9 +883,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - size, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile, + size, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1065,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1079,7 +1079,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating. */ - if (XE_WA(stream->gt, 1508761755)) { + if (XE_GT_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -1582,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1668,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1754,7 +1754,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode. */ - if (XE_WA(stream->gt, 1509372804)) { + if (XE_GT_WA(stream->gt, 1509372804)) { ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) @@ -1886,7 +1886,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) { u32 reg, shift; - if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) { + if (XE_GT_WA(gt, 18013179988) || XE_GT_WA(gt, 14015568240)) { xe_pm_runtime_get(gt_to_xe(gt)); reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); @@ -1941,7 +1941,7 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) /* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit) - param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { @@ -2493,7 +2493,7 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - if (!xe_gt_is_media_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) return 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ @@ -2506,7 +2506,7 @@ static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) return XE_OA_UNIT_INVALID; - xe_gt_WARN_ON(hwe->gt, !xe_gt_is_media_type(hwe->gt)); + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; @@ -2589,7 +2589,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; } else { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 89814b32e585..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -17,6 +17,8 @@ #include "display/xe_display.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drv.h" #include "xe_gt.h" @@ -38,43 +40,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_fan_control:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_mbx_power_limits:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 needs_scratch:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -92,7 +57,7 @@ static const struct xe_graphics_desc graphics_xelp = { }; #define XE_HP_FEATURES \ - .has_range_tlb_invalidation = true, \ + .has_range_tlb_inval = true, \ .va_bits = 48, \ .vm_max_level = 3 @@ -140,8 +105,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ - .has_range_tlb_invalidation = 1, \ + .has_range_tlb_inval = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ .va_bits = 48, \ @@ -184,6 +148,7 @@ static const struct xe_ip graphics_ips[] = { { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ @@ -196,6 +161,7 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { @@ -205,6 +171,8 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -215,6 +183,7 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -227,6 +196,8 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -243,6 +214,8 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -257,6 +230,8 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .has_sriov = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -270,7 +245,9 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -281,6 +258,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -293,16 +271,19 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, .has_display = false, + .has_sriov = true, }; static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -317,7 +298,9 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, .has_mbx_power_limits = false, @@ -330,6 +313,7 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, }; static const struct xe_device_desc lnl_desc = { @@ -337,6 +321,7 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -347,7 +332,11 @@ static const struct xe_device_desc bmg_desc = { .has_display = true, .has_fan_control = true, .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_late_bind = true, + .has_sriov = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -356,7 +345,7 @@ static const struct xe_device_desc ptl_desc = { .dma_mask_size = 46, .has_display = true, .has_sriov = true, - .require_force_probe = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -522,6 +511,26 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, *revid = REG_FIELD_GET(GMD_ID_REVID, val); } +static const struct xe_ip *find_graphics_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) + if (graphics_ips[i].verx100 == verx100) + return &graphics_ips[i]; + return NULL; +} + +static const struct xe_ip *find_media_ip(unsigned int verx100) +{ + KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100); + + for (int i = 0; i < ARRAY_SIZE(media_ips); i++) + if (media_ips[i].verx100 == verx100) + return &media_ips[i]; + return NULL; +} + /* * Read IP version from hardware and select graphics/media IP descriptors * based on the result. @@ -539,14 +548,7 @@ static void handle_gmdid(struct xe_device *xe, read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid); - for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) { - if (ver == graphics_ips[i].verx100) { - *graphics_ip = &graphics_ips[i]; - - break; - } - } - + *graphics_ip = find_graphics_ip(ver); if (!*graphics_ip) { drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n", ver / 100, ver % 100); @@ -557,14 +559,7 @@ static void handle_gmdid(struct xe_device *xe, if (ver == 0) return; - for (int i = 0; i < ARRAY_SIZE(media_ips); i++) { - if (ver == media_ips[i].verx100) { - *media_ip = &media_ips[i]; - - break; - } - } - + *media_ip = find_media_ip(ver); if (!*media_ip) { drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n", ver / 100, ver % 100); @@ -590,8 +585,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; + xe->info.has_late_bind = desc->has_late_bind; xe->info.has_llc = desc->has_llc; xe->info.has_pxp = desc->has_pxp; xe->info.has_sriov = desc->has_sriov; @@ -603,6 +600,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); @@ -613,6 +614,44 @@ static int xe_info_init_early(struct xe_device *xe, } /* + * Possibly override number of tile based on configuration register. + */ +static void xe_info_probe_tile_count(struct xe_device *xe) +{ + struct xe_mmio *mmio; + u8 tile_count; + u32 mtcfg; + + KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe); + + /* + * Probe for tile count only for platforms that support multiple + * tiles. + */ + if (xe->info.tile_count == 1) + return; + + if (xe->info.skip_mtcfg) + return; + + mmio = xe_root_tile_mmio(xe); + + /* + * Although the per-tile mmio regs are not yet initialized, this + * is fine as it's going to the root tile's mmio, that's + * guaranteed to be initialized earlier in xe_mmio_probe_early() + */ + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); + tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; + + if (tile_count < xe->info.tile_count) { + drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", + xe->info.tile_count, tile_count); + xe->info.tile_count = tile_count; + } +} + +/* * Initialize device info content that does require knowledge about * graphics / media IP version. * Make sure that GT / tile structures allocated by the driver match the data @@ -682,10 +721,12 @@ static int xe_info_init(struct xe_device *xe, /* Runtime detection may change this later */ xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; - xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; + xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval; xe->info.has_usm = graphics_desc->has_usm; xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; + xe_info_probe_tile_count(xe); + for_each_remote_tile(tile, xe, id) { int err; @@ -701,12 +742,18 @@ static int xe_info_init(struct xe_device *xe, * All of these together determine the overall GT count. */ for_each_tile(tile, xe, id) { + int err; + gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + err = xe_tile_alloc_vram(tile); + if (err) + return err; + if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -723,19 +770,18 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; } + /* + * Now that we have tiles and GTs defined, let's loop over valid GTs + * in order to define gt_count. + */ + for_each_gt(gt, xe, id) + xe->info.gt_count++; + return 0; } @@ -746,7 +792,7 @@ static void xe_pci_remove(struct pci_dev *pdev) if (IS_SRIOV_PF(xe)) xe_pci_sriov_configure(pdev, 0); - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return; xe_device_remove(xe); @@ -779,6 +825,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct xe_device *xe; int err; + xe_configfs_check_device(pdev); + if (desc->require_force_probe && !id_forced(pdev->device)) { dev_info(&pdev->dev, "Your graphics device %04x is not officially supported\n" @@ -819,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver @@ -826,7 +876,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * flashed through mei. Return success, if survivability mode * is enabled due to pcode failure or configfs being set */ - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return 0; if (err) @@ -920,7 +970,7 @@ static int xe_pci_suspend(struct device *dev) struct xe_device *xe = pdev_to_xe_device(pdev); int err; - if (xe_survivability_mode_is_enabled(xe)) + if (xe_survivability_mode_is_boot_enabled(xe)) return -EBUSY; err = xe_pm_suspend(xe); diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 8813efdcafbb..af05db07162e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/bitops.h> +#include <linux/pci.h> + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -12,6 +16,7 @@ #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_printk.h" @@ -127,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -138,6 +155,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_assert(xe, num_vfs <= total_vfs); xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + err = xe_sriov_pf_wait_ready(xe); + if (err) + goto out; + /* * We must hold additional reference to the runtime PM to keep PF in D0 * during VFs lifetime, as our VFs do not implement the PM capability. @@ -153,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed; @@ -169,7 +196,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) failed: pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); - +out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", num_vfs, str_plural(num_vfs), ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..9b9766a3baa3 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,6 +8,48 @@ #include <linux/types.h> +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_late_bind:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + struct xe_graphics_desc { u8 va_bits; u8 vm_max_level; @@ -19,7 +61,7 @@ struct xe_graphics_desc { u8 has_atomic_enable_pte_bit:1; u8 has_flat_ccs:1; u8 has_indirect_ring_state:1; - u8 has_range_tlb_invalidation:1; + u8 has_range_tlb_inval:1; u8 has_usm:1; u8 has_64bit_timestamp:1; }; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9189117fe825..6a7ddb9005f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -336,3 +336,33 @@ int xe_pcode_probe_early(struct xe_device *xe) return xe_pcode_ready(xe, false); } ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ + +/* Helpers with drm device. These should only be called by the display side */ +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(tile, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_write_timeout(tile, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index de38f44f3201..a5584c1c75f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,8 +7,10 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_tile; + +struct drm_device; struct xe_device; +struct xe_tile; void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); @@ -32,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, | FIELD_PREP(PCODE_MB_PARAM1, param1)\ | FIELD_PREP(PCODE_MB_PARAM2, param2)) +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 0befdea77db1..92bfcba51e19 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -50,6 +50,21 @@ #define READ_PL_FROM_FW 0x1 #define READ_PL_FROM_PCODE 0x0 +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..2c5a44377994 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -18,11 +18,15 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" -#include "xe_guc.h" +#include "xe_gt_idle.h" +#include "xe_i2c.h" #include "xe_irq.h" +#include "xe_late_bind_fw.h" #include "xe_pcode.h" #include "xe_pxp.h" +#include "xe_sriov_vf_ccs.h" #include "xe_trace.h" +#include "xe_vm.h" #include "xe_wa.h" /** @@ -126,6 +130,8 @@ int xe_pm_suspend(struct xe_device *xe) if (err) goto err; + xe_late_bind_wait_for_worker_completion(&xe->late_bind); + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); @@ -134,7 +140,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -146,12 +152,13 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -174,6 +181,9 @@ int xe_pm_resume(struct xe_device *xe) drm_dbg(&xe->drm, "Resuming device\n"); trace_xe_pm_resume(xe, __builtin_return_address(0)); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); @@ -191,6 +201,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_i2c_pm_resume(xe, true); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -204,6 +216,11 @@ int xe_pm_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_register_context(xe); + + xe_late_bind_fw_load(&xe->late_bind); + drm_dbg(&xe->drm, "Device resumed\n"); return 0; err: @@ -239,6 +256,10 @@ static void xe_pm_runtime_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + /* * Disable the system suspend direct complete optimization. * We need to ensure that the regular device suspend/resume functions @@ -286,6 +307,19 @@ static u32 vram_threshold_value(struct xe_device *xe) return DEFAULT_VRAM_THRESHOLD; } +static void xe_pm_wake_rebind_workers(struct xe_device *xe) +{ + struct xe_vm *vm, *next; + + mutex_lock(&xe->rebind_resume_lock); + list_for_each_entry_safe(vm, next, &xe->rebind_resume_list, + preempt.pm_activate_link) { + list_del_init(&vm->preempt.pm_activate_link); + xe_vm_resume_rebind_worker(vm); + } + mutex_unlock(&xe->rebind_resume_lock); +} + static int xe_pm_notifier_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -295,30 +329,30 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + reinit_completion(&xe->pm_block); xe_pm_runtime_get(xe); err = xe_bo_evict_all_user(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); - xe_pm_runtime_put(xe); - break; - } err = xe_bo_notifier_prepare_all_pinned(xe); - if (err) { + if (err) drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); - xe_pm_runtime_put(xe); - } + /* + * Keep the runtime pm reference until post hibernation / post suspend to + * avoid a runtime suspend interfering with evicted objects or backup + * allocations. + */ break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: + complete_all(&xe->pm_block); + xe_pm_wake_rebind_workers(xe); xe_bo_notifier_unprepare_all_pinned(xe); xe_pm_runtime_put(xe); break; } - if (err) - return NOTIFY_BAD; - return NOTIFY_DONE; } @@ -340,6 +374,14 @@ int xe_pm_init(struct xe_device *xe) if (err) return err; + err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock); + if (err) + goto err_unregister; + + init_completion(&xe->pm_block); + complete_all(&xe->pm_block); + INIT_LIST_HEAD(&xe->rebind_resume_list); + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return 0; @@ -363,6 +405,10 @@ static void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; + /* Our current VFs do not support RPM. so, disable it */ + if (IS_SRIOV_VF(xe)) + return; + pm_runtime_get_sync(dev); pm_runtime_forbid(dev); } @@ -488,6 +534,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -519,6 +567,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_rpm_lockmap_acquire(xe); + for_each_gt(gt, xe, id) + xe_gt_idle_disable_c6(gt); + if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); if (err) @@ -535,6 +586,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -550,6 +603,12 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe_pxp_pm_resume(xe->pxp); + if (IS_VF_CCS_READY(xe)) + xe_sriov_vf_ccs_register_context(xe); + + if (xe->d3cold.allowed) + xe_late_bind_fw_load(&xe->late_bind); + out: xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); @@ -753,11 +812,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 69df0e3520a5..cab51d826345 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h new file mode 100644 index 000000000000..c5be2385aa95 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_printk.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PRINTK_H_ +#define _XE_PRINTK_H_ + +#include <drm/drm_print.h> + +#include "xe_device_types.h" + +#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args + +#define xe_printk(_xe, _level, _fmt, ...) \ + drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_err(_xe, _fmt, ...) \ + xe_printk((_xe), err, _fmt, ##__VA_ARGS__) + +#define xe_err_once(_xe, _fmt, ...) \ + xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__) + +#define xe_err_ratelimited(_xe, _fmt, ...) \ + xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_warn(_xe, _fmt, ...) \ + xe_printk((_xe), warn, _fmt, ##__VA_ARGS__) + +#define xe_notice(_xe, _fmt, ...) \ + xe_printk((_xe), notice, _fmt, ##__VA_ARGS__) + +#define xe_info(_xe, _fmt, ...) \ + xe_printk((_xe), info, _fmt, ##__VA_ARGS__) + +#define xe_dbg(_xe, _fmt, ...) \ + xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__) + +#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \ + drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__) + +#define xe_WARN(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \ + xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__)) + +#define xe_WARN_ON(_xe, _condition) \ + xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_WARN_ON_ONCE(_xe, _condition) \ + xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_err(xe, "%pV", vaf); +} + +static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + + xe_info(xe, "%pV", vaf); +} + +static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_device *xe = p->arg; + struct drm_printer ddp; + + /* + * The original xe_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); + ddp.origin = p->origin; + + drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf)); +} + +/** + * xe_err_printer - Construct a &drm_printer that outputs to xe_err() + * @xe: the &xe_device pointer to use in xe_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_err_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_err, + .arg = xe, + }; + return p; +} + +/** + * xe_info_printer - Construct a &drm_printer that outputs to xe_info() + * @xe: the &xe_device pointer to use in xe_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_info_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_info, + .arg = xe, + }; + return p; +} + +/** + * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg() + * @xe: the &xe_device pointer to use in xe_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_dbg_printer(struct xe_device *xe) +{ + struct drm_printer p = { + .printfn = __xe_printfn_dbg, + .arg = xe, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c new file mode 100644 index 000000000000..45d142191d60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_configfs.h" +#include "xe_psmi.h" + +/* + * PSMI capture support + * + * Requirement for PSMI capture is to have a physically contiguous buffer. The + * PSMI tool owns doing all necessary configuration (MMIO register writes are + * done from user-space). However, KMD needs to provide the PSMI tool with the + * required physical address of the base of PSMI buffer in case of VRAM. + * + * VRAM backed PSMI buffer: + * Buffer is allocated as GEM object and with XE_BO_CREATE_PINNED_BIT flag which + * creates a contiguous allocation. The physical address is returned from + * psmi_debugfs_capture_addr_show(). PSMI tool can mmap the buffer via the + * PCIBAR through sysfs. + * + * SYSTEM memory backed PSMI buffer: + * Interface here does not support allocating from SYSTEM memory region. The + * PSMI tool needs to allocate memory themselves using hugetlbfs. In order to + * get the physical address, user-space can query /proc/[pid]/pagemap. As an + * alternative, CMA debugfs could also be used to allocate reserved CMA memory. + */ + +static bool psmi_enabled(struct xe_device *xe) +{ + return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)); +} + +static void psmi_free_object(struct xe_bo *bo) +{ + xe_bo_lock(bo, NULL); + xe_bo_unpin(bo); + xe_bo_unlock(bo); + xe_bo_put(bo); +} + +/* + * Free PSMI capture buffer objects. + */ +static void psmi_cleanup(struct xe_device *xe) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + psmi_free_object(bo); + xe->psmi.capture_obj[id] = NULL; + } + } +} + +static struct xe_bo *psmi_alloc_object(struct xe_device *xe, + unsigned int id, size_t bo_size) +{ + struct xe_tile *tile; + + if (!id || !bo_size) + return NULL; + + tile = &xe->tiles[id - 1]; + + /* VRAM: Allocate GEM object for the capture buffer */ + return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull, + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_PINNED_LATE_RESTORE | + XE_BO_FLAG_NEEDS_CPU_ACCESS); +} + +/* + * Allocate PSMI capture buffer objects (via debugfs set function), based on + * which regions the user has selected in region_mask. @size: size in bytes + * (should be power of 2) + * + * Always release/free the current buffer objects before attempting to allocate + * new ones. Size == 0 will free all current buffers. + * + * Note, we don't write any registers as the capture tool is already configuring + * all PSMI registers itself via mmio space. + */ +static int psmi_resize_object(struct xe_device *xe, size_t size) +{ + unsigned long id, region_mask = xe->psmi.region_mask; + struct xe_bo *bo = NULL; + int err = 0; + + /* if resizing, free currently allocated buffers first */ + psmi_cleanup(xe); + + /* can set size to 0, in which case, now done */ + if (!size) + return 0; + + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = psmi_alloc_object(xe, id, size); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + break; + } + xe->psmi.capture_obj[id] = bo; + + drm_info(&xe->drm, + "PSMI capture size requested: %zu bytes, allocated: %lu:%zu\n", + size, id, bo ? xe_bo_size(bo) : 0); + } + + /* on error, reverse what was allocated */ + if (err) + psmi_cleanup(xe); + + return err; +} + +/* + * Returns an address for the capture tool to use to find start of capture + * buffer. Capture tool requires the capability to have a buffer allocated per + * each tile (VRAM region), thus we return an address for each region. + */ +static int psmi_debugfs_capture_addr_show(struct seq_file *m, void *data) +{ + struct xe_device *xe = m->private; + unsigned long id, region_mask; + struct xe_bo *bo; + u64 val; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + /* VRAM region */ + bo = xe->psmi.capture_obj[id]; + if (!bo) + continue; + + /* pinned, so don't need bo_lock */ + val = __xe_bo_addr(bo, 0, PAGE_SIZE); + seq_printf(m, "%ld: 0x%llx\n", id, val); + } + + return 0; +} + +/* + * Return capture buffer size, using the size from first allocated object that + * is found. This works because all objects must be of the same size. + */ +static int psmi_debugfs_capture_size_get(void *data, u64 *val) +{ + unsigned long id, region_mask; + struct xe_device *xe = data; + struct xe_bo *bo; + + region_mask = xe->psmi.region_mask; + for_each_set_bit(id, ®ion_mask, + ARRAY_SIZE(xe->psmi.capture_obj)) { + /* smem should never be set */ + xe_assert(xe, id); + + bo = xe->psmi.capture_obj[id]; + if (bo) { + *val = xe_bo_size(bo); + return 0; + } + } + + /* no capture objects are allocated */ + *val = 0; + + return 0; +} + +/* + * Set size of PSMI capture buffer. This triggers the allocation of capture + * buffer in each memory region as specified with prior write to + * psmi_capture_region_mask. + */ +static int psmi_debugfs_capture_size_set(void *data, u64 val) +{ + struct xe_device *xe = data; + + /* user must have specified at least one region */ + if (!xe->psmi.region_mask) + return -EINVAL; + + return psmi_resize_object(xe, val); +} + +static int psmi_debugfs_capture_region_mask_get(void *data, u64 *val) +{ + struct xe_device *xe = data; + + *val = xe->psmi.region_mask; + + return 0; +} + +/* + * Select VRAM regions for multi-tile devices, only allowed when buffer is not + * currently allocated. + */ +static int psmi_debugfs_capture_region_mask_set(void *data, u64 region_mask) +{ + struct xe_device *xe = data; + u64 size = 0; + + /* SMEM is not supported (see comments at top of file) */ + if (region_mask & 0x1) + return -EOPNOTSUPP; + + /* input bitmask should contain only valid TTM regions */ + if (!region_mask || region_mask & ~xe->info.mem_region_mask) + return -EINVAL; + + /* only allow setting mask if buffer is not yet allocated */ + psmi_debugfs_capture_size_get(xe, &size); + if (size) + return -EBUSY; + + xe->psmi.region_mask = region_mask; + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(psmi_debugfs_capture_addr); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_region_mask_fops, + psmi_debugfs_capture_region_mask_get, + psmi_debugfs_capture_region_mask_set, + "0x%llx\n"); + +DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_size_fops, + psmi_debugfs_capture_size_get, + psmi_debugfs_capture_size_set, + "%lld\n"); + +void xe_psmi_debugfs_register(struct xe_device *xe) +{ + struct drm_minor *minor; + + if (!psmi_enabled(xe)) + return; + + minor = xe->drm.primary; + if (!minor->debugfs_root) + return; + + debugfs_create_file("psmi_capture_addr", + 0400, minor->debugfs_root, xe, + &psmi_debugfs_capture_addr_fops); + + debugfs_create_file("psmi_capture_region_mask", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_region_mask_fops); + + debugfs_create_file("psmi_capture_size", + 0600, minor->debugfs_root, xe, + &psmi_debugfs_capture_size_fops); +} + +static void psmi_fini(void *arg) +{ + psmi_cleanup(arg); +} + +int xe_psmi_init(struct xe_device *xe) +{ + if (!psmi_enabled(xe)) + return 0; + + return devm_add_action(xe->drm.dev, psmi_fini, xe); +} diff --git a/drivers/gpu/drm/xe/xe_psmi.h b/drivers/gpu/drm/xe/xe_psmi.h new file mode 100644 index 000000000000..b1dfba80d893 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_psmi.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_PSMI_H_ +#define _XE_PSMI_H_ + +struct xe_device; + +int xe_psmi_init(struct xe_device *xe); +void xe_psmi_debugfs_register(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c8e63bd23300..07f96bda638a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -13,16 +13,17 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pt_types.h" #include "xe_pt_walk.h" #include "xe_res_cursor.h" #include "xe_sched_job.h" -#include "xe_sync.h" #include "xe_svm.h" +#include "xe_sync.h" +#include "xe_tlb_inval_job.h" #include "xe_trace.h" #include "xe_ttm_stolen_mgr.h" +#include "xe_userptr.h" #include "xe_vm.h" struct xe_pt_dir { @@ -69,7 +70,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, if (level > MAX_HUGEPTE_LEVEL) return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); + 0); return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | XE_PTE_NULL; @@ -88,6 +89,7 @@ static void xe_pt_free(struct xe_pt *pt) * @vm: The vm to create for. * @tile: The tile to create for. * @level: The page-table level. + * @exec: The drm_exec object used to lock the vm. * * Allocate and initialize a single struct xe_pt metadata structure. Also * create the corresponding page-table bo, but don't initialize it. If the @@ -99,7 +101,7 @@ static void xe_pt_free(struct xe_pt *pt) * error. */ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) + unsigned int level, struct drm_exec *exec) { struct xe_pt *pt; struct xe_bo *bo; @@ -123,9 +125,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; pt->level = level; + + drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec)); bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - bo_flags); + bo_flags, exec); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -518,7 +522,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; + u16 pat_index = xe_walk->vma->attr.pat_index; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -589,7 +593,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, if (covers || !*child) { u64 flags = 0; - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); + xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1, + xe_vm_validation_exec(vm)); if (IS_ERR(xe_child)) return PTR_ERR(xe_child); @@ -616,7 +621,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, xe_child->is_compact = true; } - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags; ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, pte); } @@ -640,28 +645,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { * - In all other cases device atomics will be disabled with AE=0 until an application * request differently using a ioctl like madvise. */ -static bool xe_atomic_for_vram(struct xe_vm *vm) +static bool xe_atomic_for_vram(struct xe_vm *vm, struct xe_vma *vma) { + if (vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) + return false; + return true; } -static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo) +static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_vma *vma) { struct xe_device *xe = vm->xe; + struct xe_bo *bo = xe_vma_bo(vma); - if (!xe->info.has_device_atomics_on_smem) + if (!xe->info.has_device_atomics_on_smem || + vma->attr.atomic_access == DRM_XE_ATOMIC_CPU) return false; + if (vma->attr.atomic_access == DRM_XE_ATOMIC_DEVICE) + return true; + /* * If a SMEM+LMEM allocation is backed by SMEM, a device * atomics will cause a gpu page fault and which then * gets migrated to LMEM, bind such allocations with * device atomics enabled. - * - * TODO: Revisit this. Perhaps add something like a - * fault_on_atomics_in_system UAPI flag. - * Note that this also prohibits GPU atomics in LR mode for - * userptr and system memory on DGFX. */ return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) || (bo && xe_bo_has_single_placement(bo)))); @@ -725,7 +733,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, return -EAGAIN; } if (xe_svm_range_has_dma_mapping(range)) { - xe_res_first_dma(range->base.dma_addr, 0, + xe_res_first_dma(range->base.pages.dma_addr, 0, range->base.itree.last + 1 - range->base.itree.start, &curs); xe_svm_range_debug(range, "BIND PREPARE - MIXED"); @@ -744,8 +752,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, goto walk_pt; if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0; - xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ? + xe_walk.default_vram_pte = xe_atomic_for_vram(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; + xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0; } @@ -756,8 +764,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma) && !range) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); + xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -910,7 +918,7 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) if (xe_vma_bo(vma)) xe_bo_assert_held(xe_vma_bo(vma)); else if (xe_vma_is_userptr(vma)) - lockdep_assert_held(&xe_vma_vm(vma)->userptr.notifier_lock); + lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock); if (!(pt_mask & BIT(tile->id))) return false; @@ -950,7 +958,19 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt = vm->pt_root[tile->id]; u8 pt_mask = (range->tile_present & ~range->tile_invalidated); - xe_svm_assert_in_notifier(vm); + /* + * Locking rules: + * + * - notifier_lock (write): full protection against page table changes + * and MMU notifier invalidations. + * + * - notifier_lock (read) + vm_lock (write): combined protection against + * invalidations and concurrent page table modifications. (e.g., madvise) + * + */ + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0))); if (!(pt_mask & BIT(tile->id))) return false; @@ -1033,7 +1053,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_pt_commit_prepare_locks_assert(vma); if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); } static void xe_pt_commit(struct xe_vma *vma, @@ -1261,6 +1281,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, } static int xe_pt_vm_dependencies(struct xe_sched_job *job, + struct xe_tlb_inval_job *ijob, + struct xe_tlb_inval_job *mjob, struct xe_vm *vm, struct xe_vma_ops *vops, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -1328,6 +1350,20 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); + if (job) { + if (ijob) { + err = xe_tlb_inval_job_alloc_dep(ijob); + if (err) + return err; + } + + if (mjob) { + err = xe_tlb_inval_job_alloc_dep(mjob); + if (err) + return err; + } + } + return err; } @@ -1339,10 +1375,12 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[pt_update->tile_id]; - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, + return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob, + pt_update->mjob, vm, pt_update->vops, pt_update_ops, rftree); } +#if IS_ENABLED(CONFIG_DRM_GPUSVM) #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) @@ -1373,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, struct xe_userptr_vma *uvma; unsigned long notifier_seq; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); if (!xe_vma_is_userptr(vma)) return 0; @@ -1382,7 +1420,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, if (xe_pt_userptr_inject_eagain(uvma)) xe_vma_userptr_force_invalidate(uvma); - notifier_seq = uvma->userptr.notifier_seq; + notifier_seq = uvma->userptr.pages.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) @@ -1398,12 +1436,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; } -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) +static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op, + struct xe_vm_pgtable_update_ops *pt_update) { int err = 0; - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); switch (op->base.op) { case DRM_GPUVA_OP_MAP: @@ -1421,64 +1459,10 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, case DRM_GPUVA_OP_UNMAP: break; case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vma_op *op; - unsigned long i; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - xe_svm_notifier_lock(vm); - - list_for_each_entry(op, &vops->list, link) { - struct xe_svm_range *range = NULL; - - if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) - continue; + if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) { + struct xe_svm_range *range = op->map_range.range; + unsigned long i; - if (op->base.op == DRM_GPUVA_OP_PREFETCH) { xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))); xa_for_each(&op->prefetch_range.range, i, range) { @@ -1486,97 +1470,62 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update) if (!xe_svm_range_pages_valid(range)) { xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); return -ENODATA; } } } else { + err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update); + } + break; +#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) + case DRM_GPUVA_OP_DRIVER: + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { + struct xe_svm_range *range = op->map_range.range; + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma)); - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE); - range = op->map_range.range; xe_svm_range_debug(range, "PRE-COMMIT"); if (!xe_svm_range_pages_valid(range)) { xe_svm_range_debug(range, "PRE-COMMIT - RETRY"); - xe_svm_notifier_unlock(vm); return -EAGAIN; } } - } - - return 0; -} + break; #endif - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - xe_gt_tlb_invalidation_fence_signal(&ifence->base); + default: + drm_warn(&vm->xe->drm, "NOT POSSIBLE"); } - dma_fence_put(ifence->fence); -} -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); + return err; } -static void invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) +static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); + struct xe_vm *vm = pt_update->vops->vm; + struct xe_vma_ops *vops = pt_update->vops; + struct xe_vm_pgtable_update_ops *pt_update_ops = + &vops->pt_update_ops[pt_update->tile_id]; + struct xe_vma_op *op; + int err; - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); + err = xe_pt_pre_commit(pt_update); + if (err) + return err; - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; + xe_svm_notifier_lock(vm); - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ + list_for_each_entry(op, &vops->list, link) { + err = op_check_svm_userptr(vm, op, pt_update_ops); + if (err) { + xe_svm_notifier_unlock(vm); + break; + } } - xe_gt_assert(gt, !ret || ret == -ENOENT); + return err; } +#endif struct xe_pt_stage_unbind_walk { /** @base: The pagewalk base-class. */ @@ -1879,7 +1828,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); /* * If rebind, we have to invalidate TLB on !LR vms to invalidate @@ -1987,7 +1936,7 @@ static int unbind_op_prepare(struct xe_tile *tile, xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma), xe_vma_end(vma)); ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); + pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma); pt_update_ops->needs_invalidation = true; xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); @@ -2073,7 +2022,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2235,7 +2184,7 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, vma->tile_invalidated & ~BIT(tile->id)); vma->tile_staged &= ~BIT(tile->id); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); to_userptr_vma(vma)->userptr.initial_bind = true; } @@ -2271,7 +2220,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, if (!vma->tile_present) { list_del_init(&vma->combined_links.rebind); if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); + xe_svm_assert_held_read(vm); spin_lock(&vm->userptr.invalidated_lock); list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); @@ -2303,7 +2252,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, @@ -2374,22 +2323,25 @@ static const struct xe_migrate_pt_update_ops migrate_ops = { .pre_commit = xe_pt_pre_commit, }; -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = { .populate = xe_vm_populate_pgtable, .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) -static const struct xe_migrate_pt_update_ops svm_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_svm_pre_commit, + .pre_commit = xe_pt_svm_userptr_pre_commit, }; #else -static const struct xe_migrate_pt_update_ops svm_migrate_ops; +static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops; #endif +static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q, + struct xe_gt *gt) +{ + if (xe_gt_is_media_type(gt)) + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler; + + return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler; +} + /** * xe_pt_update_ops_run() - Run PT update operations * @tile: Tile of PT update operations @@ -2407,8 +2359,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) struct xe_vm *vm = vops->vm; struct xe_vm_pgtable_update_ops *pt_update_ops = &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; + struct dma_fence *fence, *ifence, *mfence; + struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL; struct dma_fence **fences = NULL; struct dma_fence_array *cf = NULL; struct xe_range_fence *rfence; @@ -2416,9 +2368,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) int err = 0, i; struct xe_migrate_pt_update update = { .ops = pt_update_ops->needs_svm_lock ? - &svm_migrate_ops : - pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : + &svm_userptr_migrate_ops : &migrate_ops, .vops = vops, .tile_id = tile->id, @@ -2440,26 +2390,45 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) #endif if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; + struct xe_exec_queue *q = pt_update_ops->q; + struct xe_dep_scheduler *dep_scheduler = + to_dep_scheduler(q, tile->primary_gt); + + ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval, + dep_scheduler, + pt_update_ops->start, + pt_update_ops->last, + vm->usm.asid); + if (IS_ERR(ijob)) { + err = PTR_ERR(ijob); goto kill_vm_tile1; } + update.ijob = ijob; + if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; + dep_scheduler = to_dep_scheduler(q, tile->media_gt); + + mjob = xe_tlb_inval_job_create(q, + &tile->media_gt->tlb_inval, + dep_scheduler, + pt_update_ops->start, + pt_update_ops->last, + vm->usm.asid); + if (IS_ERR(mjob)) { + err = PTR_ERR(mjob); + goto free_ijob; } + update.mjob = mjob; + fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); if (!fences) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } cf = dma_fence_array_alloc(2); if (!cf) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } } } @@ -2467,7 +2436,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); if (!rfence) { err = -ENOMEM; - goto free_ifence; + goto free_ijob; } fence = xe_migrate_update_pgtables(tile->migrate, &update); @@ -2491,30 +2460,31 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) pt_update_ops->last, fence)) dma_fence_wait(fence, false); - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; + /* tlb invalidation must be done before signaling unbind/rebind */ + if (ijob) { + struct dma_fence *__fence; + + ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence); + __fence = ifence; + + if (mjob) { + fences[0] = ifence; + mfence = xe_tlb_inval_job_push(mjob, tile->migrate, + fence); + fences[1] = mfence; + dma_fence_array_init(cf, 2, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, false); - fence = &cf->base; - } else { - fence = &ifence->base.base; + __fence = &cf->base; } + + dma_fence_put(fence); + fence = __fence; } - if (!mfence) { + if (!mjob) { dma_resv_add_fence(xe_vm_resv(vm), fence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : @@ -2523,35 +2493,36 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) list_for_each_entry(op, &vops->list, link) op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), ifence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, + dma_resv_add_fence(xe_vm_resv(vm), mfence, pt_update_ops->wait_vm_bookkeep ? DMA_RESV_USAGE_KERNEL : DMA_RESV_USAGE_BOOKKEEP); list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); + op_commit(vops->vm, tile, pt_update_ops, op, ifence, + mfence); } if (pt_update_ops->needs_svm_lock) xe_svm_notifier_unlock(vm); - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); + + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); return fence; free_rfence: kfree(rfence); -free_ifence: +free_ijob: kfree(cf); kfree(fences); - kfree(mfence); - kfree(ifence); + xe_tlb_inval_job_put(mjob); + xe_tlb_inval_job_put(ijob); kill_vm_tile1: if (err != -EAGAIN && err != -ENODATA && tile->id) xe_vm_kill(vops->vm, false); diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h index 5ecf003d513c..4daeebaab5a1 100644 --- a/drivers/gpu/drm/xe/xe_pt.h +++ b/drivers/gpu/drm/xe/xe_pt.h @@ -10,6 +10,7 @@ #include "xe_pt_types.h" struct dma_fence; +struct drm_exec; struct xe_bo; struct xe_device; struct xe_exec_queue; @@ -29,7 +30,7 @@ struct xe_vma_ops; unsigned int xe_pt_shift(unsigned int level); struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level); + unsigned int level, struct drm_exec *exec); void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, struct xe_pt *pt); diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h index 69eab6f37cfe..881f01e14db8 100644 --- a/drivers/gpu/drm/xe/xe_pt_types.h +++ b/drivers/gpu/drm/xe/xe_pt_types.h @@ -45,8 +45,7 @@ struct xe_pt_ops { u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr, u16 pat_index, u32 pt_level, bool devmem, u64 flags); - u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset, - u16 pat_index); + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset); }; struct xe_pt_entry { @@ -106,8 +105,6 @@ struct xe_vm_pgtable_update_ops { u32 current_op; /** @needs_svm_lock: Needs SVM lock */ bool needs_svm_lock; - /** @needs_userptr_lock: Needs userptr lock */ - bool needs_userptr_lock; /** @needs_invalidation: Needs invalidation */ bool needs_invalidation; /** diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 3d62008c99f1..bdbdbbf6a678 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -688,6 +688,7 @@ start: return ret; } +ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO); static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock) { diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..e60526e30030 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp) * Each termination is 16 DWORDS, so 4K is enough to contain a * termination for each sessions. */ - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); + bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT, + false); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_queue; @@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, { struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = tile_to_xe(tile); + struct xe_validation_ctx ctx; struct xe_hw_engine *hwe; + struct drm_exec exec; struct xe_vm *vm; struct xe_bo *bo; struct xe_exec_queue *q; @@ -101,20 +104,31 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); /* We allocate a single object for the batch and the in/out memory */ - xe_vm_lock(vm, false); - bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC); - xe_vm_unlock(vm); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto vm_out; + + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + if (err) + break; + + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | + XE_BO_FLAG_NEEDS_UC, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&ctx, &err); + break; + } } + if (err) + goto vm_out; fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB); if (IS_ERR(fence)) { diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e8e1743dcb1e..2e9ff33ed2fe 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -21,12 +21,14 @@ #include "xe_force_wake.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_gt_topology.h" #include "xe_guc_hwconfig.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_pxp.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" #include "xe_wa.h" static const u16 xe_to_user_engine_class[] = { @@ -141,7 +143,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); @@ -274,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[0].instance = 0; mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; - if (perfmon_capable()) - mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); mem_regions->num_mem_regions = 1; for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { @@ -291,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe, mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = man->size; - if (perfmon_capable()) { - xe_ttm_vram_get_used(man, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].used, - &mem_regions->mem_regions - [mem_regions->num_mem_regions].cpu_visible_used); - } + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = xe_ttm_vram_get_cpu_visible_size(man); @@ -337,7 +336,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) config->num_params = num_params; config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = xe->info.devid | (xe->info.revid << 16); - if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + if (xe->mem.vram) config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM)) @@ -368,6 +367,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +385,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +406,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = - BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].near_mem_regions = + BIT(gt_to_tile(gt)->mem.vram->id) << 1; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { @@ -473,7 +475,7 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); /* L3bank mask may not be available for some GTs */ - if (!XE_WA(gt, no_media_l3)) + if (xe_gt_topology_report_l3(gt)) query_size += sizeof(struct drm_xe_query_topology_mask) + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); } @@ -536,7 +538,7 @@ static int query_gt_topology(struct xe_device *xe, * mask, then it's better to omit L3 from the query rather than * reporting bogus or zeroed information to userspace. */ - if (!XE_WA(gt, no_media_l3)) { + if (xe_gt_topology_report_l3(gt)) { topo.type = DRM_XE_TOPO_L3_BANK; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, sizeof(gt->fuse_topo.l3_bank_mask)); @@ -745,10 +747,8 @@ static int query_eu_stall(struct xe_device *xe, u32 num_rates; int ret; - if (!xe_eu_stall_supported_on_platform(xe)) { - drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); + if (!xe_eu_stall_supported_on_platform(xe)) return -ENODEV; - } array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates); size = sizeof(struct drm_xe_query_eu_stall) + array_size; diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index d1a403cfb628..4e00008b7081 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -55,8 +55,8 @@ struct xe_res_cursor { u32 mem_type; /** @sgl: Scatterlist for cursor */ struct scatterlist *sgl; - /** @dma_addr: Current element in a struct drm_pagemap_device_addr array */ - const struct drm_pagemap_device_addr *dma_addr; + /** @dma_addr: Current element in a struct drm_pagemap_addr array */ + const struct drm_pagemap_addr *dma_addr; /** @mm: Buddy allocator for VRAM cursor */ struct drm_buddy *mm; /** @@ -170,7 +170,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur) */ static inline void __xe_res_dma_next(struct xe_res_cursor *cur) { - const struct drm_pagemap_device_addr *addr = cur->dma_addr; + const struct drm_pagemap_addr *addr = cur->dma_addr; u64 start = cur->start; while (start >= cur->dma_seg_size) { @@ -222,14 +222,14 @@ static inline void xe_res_first_sg(const struct sg_table *sg, /** * xe_res_first_dma - initialize a xe_res_cursor with dma_addr array * - * @dma_addr: struct drm_pagemap_device_addr array to walk + * @dma_addr: struct drm_pagemap_addr array to walk * @start: Start of the range * @size: Size of the range * @cur: cursor object to initialize * * Start walking over the range of allocations between @start and @size. */ -static inline void xe_res_first_dma(const struct drm_pagemap_device_addr *dma_addr, +static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr, u64 start, u64 size, struct xe_res_cursor *cur) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..d71837773d6c 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i) { - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -178,7 +179,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); u32 flags; - if (XE_WA(gt, 14016712196)) + if (XE_GT_WA(gt, 14016712196)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); @@ -189,7 +190,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - if (XE_WA(gt, 1409600907)) + if (XE_GT_WA(gt, 1409600907)) flags |= PIPE_CONTROL_DEPTH_STALL; if (lacks_render) @@ -205,7 +206,7 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int if (hwe->class != XE_ENGINE_CLASS_RENDER) return i; - if (XE_WA(hwe->gt, 16020292621)) + if (XE_GT_WA(hwe->gt, 16020292621)) i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, RING_NOPID(hwe->mmio_base).addr, 0); @@ -397,31 +398,26 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); - dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW; - dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT; - dw[i++] = 0; - dw[i++] = seqno; /* value */ + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, + job->migrate_flush_flags, + dw, i); i = emit_user_interrupt(dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..b5f430d59f80 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -9,6 +9,7 @@ #include <uapi/drm/xe_drm.h> +#include "xe_configfs.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_macros.h" @@ -56,37 +57,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +133,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +214,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -326,23 +359,20 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) +bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) { - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; + return !IS_SRIOV_VF(gt_to_xe(gt)); } -bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, +bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { - return !IS_SRIOV_VF(gt_to_xe(gt)); + return xe_configfs_get_psmi_enabled(to_pci_dev(gt_to_xe(gt)->drm.dev)); +} + +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + return xe_gt_has_discontiguous_dss_groups(gt); } diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..ac12ddf6cde6 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -466,17 +467,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - -/* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * * @gt: GT structure @@ -487,4 +477,10 @@ bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe); +bool xe_rtp_match_psmi_enabled(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + +bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_gt *gt, + const struct xe_hw_engine *hwe); + #endif diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 1d43e183ca21..fedd017d6dd3 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -69,7 +69,6 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3 } sa_manager->bo = bo; sa_manager->is_iomem = bo->vmap.is_iomem; - sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h index 1170ee5a81a8..99dbf0eea540 100644 --- a/drivers/gpu/drm/xe/xe_sa.h +++ b/drivers/gpu/drm/xe/xe_sa.h @@ -7,6 +7,8 @@ #include <linux/sizes.h> #include <linux/types.h> + +#include "xe_bo.h" #include "xe_sa_types.h" struct dma_fence; @@ -43,9 +45,20 @@ to_xe_sa_manager(struct drm_suballoc_manager *mng) return container_of(mng, struct xe_sa_manager, base); } +/** + * xe_sa_manager_gpu_addr - Retrieve GPU address of a back storage BO + * within suballocator. + * @sa_manager: the &xe_sa_manager struct instance + * Return: GGTT address of the back storage BO. + */ +static inline u64 xe_sa_manager_gpu_addr(struct xe_sa_manager *sa_manager) +{ + return xe_bo_ggtt_addr(sa_manager->bo); +} + static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa) { - return to_xe_sa_manager(sa->manager)->gpu_addr + + return xe_sa_manager_gpu_addr(to_xe_sa_manager(sa->manager)) + drm_suballoc_soffset(sa); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2b070ff1292e..cb7238799dcb 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -12,7 +12,6 @@ struct xe_bo; struct xe_sa_manager { struct drm_suballoc_manager base; struct xe_bo *bo; - u64 gpu_addr; void *cpu_ptr; bool is_iomem; }; diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 125c836e0ee4..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -66,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe, struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type); struct ttm_bo_lru_cursor curs; struct ttm_buffer_object *ttm_bo; + struct ttm_lru_walk_arg arg = { + .ctx = ctx, + .trylock_only = true, + }; if (!man || !man->use_tt) continue; - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) { + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) { if (!ttm_bo_shrink_suitable(ttm_bo, ctx)) continue; @@ -82,6 +86,50 @@ static s64 xe_shrinker_walk(struct xe_device *xe, if (*scanned >= to_scan) break; } + /* Trylocks should never error, just fail. */ + xe_assert(xe, !IS_ERR(ttm_bo)); + } + + return freed; +} + +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; } return freed; @@ -193,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index a0eab44c0e76..7d2d6de2aabf 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -15,6 +15,7 @@ #include "xe_sriov.h" #include "xe_sriov_pf.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -157,3 +158,17 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) strscpy(buf, "PF", size); return buf; } + +/** + * xe_sriov_init_late() - SR-IOV late initialization functions. + * @xe: the &xe_device to initialize + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_init_late(struct xe_device *xe) +{ + if (IS_SRIOV_VF(xe)) + return xe_sriov_vf_init_late(xe); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 688fbabf08f1..6db45df55615 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -18,6 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); void xe_sriov_probe_early(struct xe_device *xe); void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); +int xe_sriov_init_late(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..27ddf3cc80e9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,13 +3,18 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt_sriov_pf.h" #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -80,9 +85,48 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; +} + +/** + * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate. + * @xe: the &xe_device to test + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_wait_ready(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + if (xe_device_wedged(xe)) + return -ECANCELED; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_wait_ready(gt); + if (err) + return err; + } + + return 0; } /** @@ -102,3 +146,45 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │  ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..e3b34f8f5e04 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,15 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +int xe_sriov_pf_wait_ready(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +28,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..956a88f9f213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 6526fe450e55..cdd9f8e78b2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -3,6 +3,7 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" @@ -10,11 +11,16 @@ #include "xe_gt.h" #include "xe_gt_sriov_printk.h" #include "xe_gt_sriov_vf.h" +#include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_guc_submit.h" +#include "xe_irq.h" +#include "xe_lrc.h" #include "xe_pm.h" #include "xe_sriov.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" #include "xe_tile_sriov_vf.h" /** @@ -124,16 +130,66 @@ * | | | */ -static bool vf_migration_supported(struct xe_device *xe) +/** + * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is + * supported or not. + * @xe: the &xe_device to check + * + * Returns: true if VF migration is supported, false otherwise. + */ +bool xe_sriov_vf_migration_supported(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + return xe->sriov.vf.migration.enabled; +} + +static void vf_disable_migration(struct xe_device *xe, const char *fmt, ...) +{ + struct va_format vaf; + va_list va_args; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + va_start(va_args, fmt); + vaf.fmt = fmt; + vaf.va = &va_args; + xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf); + va_end(va_args); + + xe->sriov.vf.migration.enabled = false; +} + +static void migration_worker_func(struct work_struct *w); + +static void vf_migration_init_early(struct xe_device *xe) { /* * TODO: Add conditions to allow specific platforms, when they're * supported at production quality. */ - return IS_ENABLED(CONFIG_DRM_XE_DEBUG); -} + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + return vf_disable_migration(xe, + "experimental feature not available on production builds"); + + if (GRAPHICS_VER(xe) < 20) + return vf_disable_migration(xe, "requires gfx version >= 20, but only %u found", + GRAPHICS_VER(xe)); + + if (!IS_DGFX(xe)) { + struct xe_uc_fw_version guc_version; + + xe_gt_sriov_vf_guc_versions(xe_device_get_gt(xe, 0), NULL, &guc_version); + if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) + return vf_disable_migration(xe, + "CCS migration requires GuC ABI >= 1.23 but only %u.%u found", + guc_version.major, guc_version.minor); + } -static void migration_worker_func(struct work_struct *w); + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); + + xe->sriov.vf.migration.enabled = true; + xe_sriov_dbg(xe, "migration support enabled\n"); +} /** * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. @@ -141,133 +197,185 @@ static void migration_worker_func(struct work_struct *w); */ void xe_sriov_vf_init_early(struct xe_device *xe) { - INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); - - if (!vf_migration_supported(xe)) - xe_sriov_info(xe, "migration not supported by this module version\n"); + vf_migration_init_early(xe); } /** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. + * vf_post_migration_shutdown - Stop the driver activities after VF migration. * @xe: the &xe_device struct instance * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * After this VM is migrated and assigned to a new VF, it is running on a new + * hardware, and therefore many hardware-dependent states and related structures + * require fixups. Without fixups, the hardware cannot do any work, and therefore + * all GPU pipelines are stalled. + * Stop some of kernel activities to make the fixup process faster. */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static void vf_post_migration_shutdown(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; - int err, ret = 0; + int ret = 0; for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; + xe_guc_submit_pause(>->uc.guc); + ret |= xe_guc_submit_reset_block(>->uc.guc); } - return ret; + if (ret) + drm_info(&xe->drm, "migration recovery encountered ongoing reset\n"); } -static void vf_post_migration_fixup_ctb(struct xe_device *xe) +/** + * vf_post_migration_kickstart - Re-start the driver activities under new hardware. + * @xe: the &xe_device struct instance + * + * After we have finished with all post-migration fixups, restart the driver + * activities to continue feeding the GPU with workloads. + */ +static void vf_post_migration_kickstart(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; - xe_assert(xe, IS_SRIOV_VF(xe)); + /* + * Make sure interrupts on the new HW are properly set. The GuC IRQ + * must be working at this point, since the recovery did started, + * but the rest was not enabled using the procedure from spec. + */ + xe_irq_resume(xe); for_each_gt(gt, xe, id) { - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); - - xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); + xe_guc_submit_reset_unblock(>->uc.guc); + xe_guc_submit_unpause(>->uc.guc); } } +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + /* - * vf_post_migration_imminent - Check if post-restore recovery is coming. + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. + * @gt_flags: flags marking to which GTs the notification shall be sent */ -static bool vf_post_migration_imminent(struct xe_device *xe) +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) { - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); + struct xe_gt *gt; + unsigned int id; + int err = 0; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); + } + + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; } -static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) +static int vf_get_next_migrated_gt_id(struct xe_device *xe) { - bool need_fixups = false; - struct xe_tile *tile; + struct xe_gt *gt; unsigned int id; - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - s64 shift; - - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - need_fixups = true; - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); - } + for_each_gt(gt, xe, id) { + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; } - return need_fixups; + return -1; } -/* - * Notify all GuCs about resource fixups apply finished. +static size_t post_migration_scratch_size(struct xe_device *xe) +{ + return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE); +} + +/** + * Perform post-migration fixups on a single GT. + * + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +static int gt_vf_post_migration_fixups(struct xe_gt *gt) { - struct xe_gt *gt; - unsigned int id; + s64 shift; + void *buf; + int err; - for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); + buf = kmalloc(post_migration_scratch_size(gt_to_xe(gt)), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + goto out; + + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt); + err = xe_guc_contexts_hwsp_rebase(>->uc.guc, buf); + if (err) + goto out; + xe_guc_jobs_ring_rebase(>->uc.guc); + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } - return; -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +out: + kfree(buf); + return err; } static void vf_post_migration_recovery(struct xe_device *xe) { - bool need_fixups; - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) - goto fail; - if (!vf_migration_supported(xe)) { - xe_sriov_err(xe, "migration not supported by this module version\n"); + vf_post_migration_shutdown(xe); + + if (!xe_sriov_vf_migration_supported(xe)) { + xe_sriov_err(xe, "migration is not supported\n"); err = -ENOTRECOVERABLE; goto fail; } - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); - /* FIXME: add the recovery steps */ - if (need_fixups) - vf_post_migration_fixup_ctb(xe); + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + vf_post_migration_kickstart(xe); + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) + goto fail; - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -282,18 +390,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -308,14 +421,55 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); } + +/** + * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions. + * @xe: the &xe_device to initialize + * + * This function initializes code for CCS migration. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_vf_init_late(struct xe_device *xe) +{ + int err = 0; + + if (xe_sriov_vf_migration_supported(xe)) + err = xe_sriov_vf_ccs_init(xe); + + return err; +} + +static int sa_info_vf_ccs(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct xe_device *xe = to_xe_device(node->minor->dev); + struct drm_printer p = drm_seq_file_printer(m); + + xe_sriov_vf_ccs_print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs }, +}; + +/** + * xe_sriov_vf_debugfs_register - Register VF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the VF. + */ +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), + root, xe->drm.primary); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h index 7b8622cff2b7..9e752105ec2a 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -6,9 +6,15 @@ #ifndef _XE_SRIOV_VF_H_ #define _XE_SRIOV_VF_H_ +#include <linux/types.h> + +struct dentry; struct xe_device; void xe_sriov_vf_init_early(struct xe_device *xe); +int xe_sriov_vf_init_late(struct xe_device *xe); void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); +bool xe_sriov_vf_migration_supported(struct xe_device *xe); +void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root); #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c new file mode 100644 index 000000000000..8dec616c37c9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "instructions/xe_mi_commands.h" +#include "instructions/xe_gpu_commands.h" +#include "xe_bb.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_exec_queue_types.h" +#include "xe_guc_submit.h" +#include "xe_lrc.h" +#include "xe_migrate.h" +#include "xe_pm.h" +#include "xe_sa.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" +#include "xe_sriov_vf_ccs.h" +#include "xe_sriov_vf_ccs_types.h" + +/** + * DOC: VF save/restore of compression Meta Data + * + * VF KMD registers two special contexts/LRCAs. + * + * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) save in regular System memory in VM. + * + * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data / + * compression control surface (Aka CCS) Restore from regular System memory in + * VM to corresponding CCS pool. + * + * Below diagram explain steps needed for VF save/Restore of compression Meta Data:: + * + * CCS Save CCS Restore VF KMD Guc BCS + * LRCA LRCA + * | | | | | + * | | | | | + * | Create Save LRCA | | | + * [ ]<----------------------------- [ ] | | + * | | | | | + * | | | | | + * | | | Register save LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | Create restore LRCA | | | + * | [ ]<------------------[ ] | | + * | | | | | + * | | | Register restore LRCA | | + * | | | with Guc | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | | | + * | | [ ]------------------------- | | + * | | [ ] Allocate main memory. | | | + * | | [ ] Allocate CCS memory. | | | + * | | [ ] Update Main memory & | | | + * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | | + * | [ ]<------------------[ ] cmds to save & restore.| | | + * | | [ ]<------------------------ | | + * | | | | | + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Paused ------------------------------------- + * | | | | | + * | | | | | + * | | | |Schedule | + * | | | |CCS Save | + * | | | | LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS save | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * : : : : : + * ---------------------------- VM Migrated ----------------------------------- + * | | | | | + * | | | | | + * : : : : : + * ---------------------------- VF Resumed ------------------------------------ + * | | | | | + * | | | | | + * | | [ ]-------------- | | + * | | [ ] Fix up GGTT | | | + * | | [ ]<------------- | | + * | | | | | + * | | | | | + * | | | Notify VF_RESFIX_DONE | | + * | | [ ]--------------------------->[ ] | + * | | | | | + * | | | |Schedule | + * | | | |CCS | + * | | | |Restore | + * | | | |LRCA | + * | | | [ ]------>[ ] + * | | | | | + * | | | | | + * | | | |CCS | + * | | | |restore | + * | | | |completed| + * | | | [ ]<------[ ] + * | | | | | + * | | | | | + * | | | VF_RESFIX_DONE complete | | + * | | | notification | | + * | | [ ]<---------------------------[ ] | + * | | | | | + * | | | | | + * : : : : : + * ------------------------- Continue VM restore ------------------------------ + */ + +static u64 get_ccs_bb_pool_size(struct xe_device *xe) +{ + u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size; + struct sysinfo si; + + si_meminfo(&si); + sys_mem_size = si.totalram * si.mem_unit; + ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe)); + ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE); + + /** + * We need below BB size to hold PTE mappings and some DWs for copy + * command. In reality, we need space for many copy commands. So, let + * us allocate double the calculated size which is enough to holds GPU + * instructions for the whole region. + */ + bb_pool_size = ptes * sizeof(u32); + + return round_up(bb_pool_size * 2, SZ_1M); +} + +static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_sa_manager *sa_manager; + u64 bb_pool_size; + int offset, err; + + bb_pool_size = get_ccs_bb_pool_size(xe); + xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", + ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); + + sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16); + + if (IS_ERR(sa_manager)) { + xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", + sa_manager); + err = PTR_ERR(sa_manager); + return err; + } + + offset = 0; + xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, + bb_pool_size); + + offset = bb_pool_size - sizeof(u32); + xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); + + ctx->mem.ccs_bb_pool = sa_manager; + + return 0; +} + +static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) +{ + u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + u32 dw[10], i = 0; + + dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE; + dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3); + dw[i++] = lower_32_bits(addr); + dw[i++] = upper_32_bits(addr); + dw[i++] = MI_NOOP; + dw[i++] = MI_NOOP; + + xe_lrc_write_ring(lrc, dw, i * sizeof(u32)); + xe_lrc_set_ring_tail(lrc, lrc->ring.tail); +} + +static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx) +{ + int ctx_type; + + switch (ctx->ctx_id) { + case XE_SRIOV_VF_CCS_READ_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_SAVE; + break; + case XE_SRIOV_VF_CCS_WRITE_CTX: + ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE; + break; + default: + return -EINVAL; + } + + xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type); + return 0; +} + +/** + * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc. + * @xe: the &xe_device to register contexts on. + * + * This function registers read and write contexts with Guc. Re-registration + * is needed whenever resuming from pm runtime suspend. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_register_context(struct xe_device *xe) +{ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + int err; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + err = register_save_restore_context(ctx); + if (err) + return err; + } + + return err; +} + +static void xe_sriov_vf_ccs_fini(void *arg) +{ + struct xe_sriov_vf_ccs_ctx *ctx = arg; + struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); + + /* + * Make TAIL = HEAD in the ring so that no issues are seen if Guc + * submits this context to HW on VF pause after unbinding device. + */ + xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc)); + xe_exec_queue_put(ctx->mig_q); +} + +/** + * xe_sriov_vf_ccs_init - Setup LRCA for save & restore. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. It initializes + * LRCA and suballocator needed for CCS save & restore. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_sriov_vf_ccs_init(struct xe_device *xe) +{ + struct xe_tile *tile = xe_device_get_root_tile(xe); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_exec_queue *q; + u32 flags; + int err; + + xe_assert(xe, IS_SRIOV_VF(xe)); + xe_assert(xe, xe_sriov_vf_migration_supported(xe)); + + if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + ctx->ctx_id = ctx_id; + + flags = EXEC_QUEUE_FLAG_KERNEL | + EXEC_QUEUE_FLAG_PERMANENT | + EXEC_QUEUE_FLAG_MIGRATE; + q = xe_exec_queue_create_bind(xe, tile, flags, 0); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto err_ret; + } + ctx->mig_q = q; + + err = alloc_bb_pool(tile, ctx); + if (err) + goto err_free_queue; + + ccs_rw_update_ring(ctx); + + err = register_save_restore_context(ctx); + if (err) + goto err_free_queue; + + err = devm_add_action_or_reset(xe->drm.dev, + xe_sriov_vf_ccs_fini, + ctx); + if (err) + goto err_ret; + } + + xe->sriov.vf.ccs.initialized = 1; + + return 0; + +err_free_queue: + xe_exec_queue_put(q); + +err_ret: + return err; +} + +/** + * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO. + * @bo: the &buffer object to which batch buffer commands will be added. + * + * This function shall be called only by VF. It inserts the PTEs and copy + * command instructions in the BO by calling xe_migrate_ccs_rw_copy() + * function. + * + * Returns: 0 if successful, negative error code on failure. + */ +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_tile *tile; + struct xe_bb *bb; + int err = 0; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + tile = xe_device_get_root_tile(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + /* bb should be NULL here. Assert if not NULL */ + xe_assert(xe, !bb); + + ctx = &xe->sriov.vf.ccs.contexts[ctx_id]; + err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id); + } + return err; +} + +/** + * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO. + * @bo: the &buffer object from which batch buffer commands will be removed. + * + * This function shall be called only by VF. It removes the PTEs and copy + * command instructions from the BO. Make sure to update the BB with MI_NOOP + * before freeing. + * + * Returns: 0 if successful. + */ +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_bb *bb; + + xe_assert(xe, IS_VF_CCS_READY(xe)); + + if (!xe_bo_has_valid_ccs_bb(bo)) + return 0; + + for_each_ccs_rw_ctx(ctx_id) { + bb = bo->bb_ccs[ctx_id]; + if (!bb) + continue; + + memset(bb->cs, MI_NOOP, bb->len * sizeof(u32)); + xe_bb_free(bb, NULL); + bo->bb_ccs[ctx_id] = NULL; + } + return 0; +} + +/** + * xe_sriov_vf_ccs_print - Print VF CCS details. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for VF use only. + */ +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) +{ + struct xe_sa_manager *bb_pool; + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + if (!IS_VF_CCS_READY(xe)) + return; + + xe_pm_runtime_get(xe); + + for_each_ccs_rw_ctx(ctx_id) { + bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool; + if (!bb_pool) + break; + + drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); + drm_printf(p, "-------------------------\n"); + drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + drm_puts(p, "\n"); + } + + xe_pm_runtime_put(xe); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h new file mode 100644 index 000000000000..0745c0ff0228 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_H_ +#define _XE_SRIOV_VF_CCS_H_ + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_vf_ccs_types.h" + +struct drm_printer; +struct xe_device; +struct xe_bo; + +int xe_sriov_vf_ccs_init(struct xe_device *xe); +int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo); +int xe_sriov_vf_ccs_register_context(struct xe_device *xe); +void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p); + +static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_VF(xe)); + return xe->sriov.vf.ccs.initialized; +} + +#define IS_VF_CCS_READY(xe) ({ \ + struct xe_device *xe__ = (xe); \ + IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \ + }) + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h new file mode 100644 index 000000000000..22c499943d2a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_CCS_TYPES_H_ +#define _XE_SRIOV_VF_CCS_TYPES_H_ + +#include <linux/types.h> + +#define for_each_ccs_rw_ctx(id__) \ + for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++) + +enum xe_sriov_vf_ccs_rw_ctxs { + XE_SRIOV_VF_CCS_READ_CTX, + XE_SRIOV_VF_CCS_WRITE_CTX, + XE_SRIOV_VF_CCS_CTX_COUNT +}; + +struct xe_migrate; +struct xe_sa_manager; + +/** + * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. + */ +struct xe_sriov_vf_ccs_ctx { + /** @ctx_id: Id to which context it belongs to */ + enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + + /** @mig_q: exec queues used for migration */ + struct xe_exec_queue *mig_q; + + /** @mem: memory data */ + struct { + /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ + struct xe_sa_manager *ccs_bb_pool; + } mem; +}; + +/** + * struct xe_sriov_vf_ccs - The VF CCS migration support data. + */ +struct xe_sriov_vf_ccs { + /** @contexts: CCS read and write contexts for VF. */ + struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT]; + + /** @initialized: Initialization of VF CCS is completed or not. */ + bool initialized; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..426cc5841958 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +#include "xe_sriov_vf_ccs_types.h" + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + /** + * @migration.enabled: flag indicating if migration support + * was enabled or not due to missing prerequisites + */ + bool enabled; + } migration; + + /** @ccs: VF CCS state data */ + struct xe_sriov_vf_ccs ccs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include <kunit/visibility.h> #include <linux/bitfield.h> #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1f710b3fc599..1662bfddd4bc 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -21,15 +22,18 @@ #define MAX_SCRATCH_MMIO 8 /** - * DOC: Xe Boot Survivability + * DOC: Survivability Mode * - * Boot Survivability is a software based workflow for recovering a system in a failed boot state + * Survivability Mode is a software based workflow for recovering a system in a failed boot state * Here system recoverability is concerned with recovering the firmware responsible for boot. * - * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware - * to be flashed through mei and collect telemetry. The driver's probe flow is modified - * such that it enters survivability mode when pcode initialization is incomplete and boot status - * denotes a failure. + * Boot Survivability + * =================== + * + * Boot Survivability is implemented by loading the driver with bare minimum (no drm card) to allow + * the firmware to be flashed through mei driver and collect telemetry. The driver's probe flow is + * modified such that it enters survivability mode when pcode initialization is incomplete and boot + * status denotes a failure. * * Survivability mode can also be entered manually using the survivability mode attribute available * through configfs which is beneficial in several usecases. It can be used to address scenarios @@ -40,12 +44,14 @@ * * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode * + * It is the responsibility of the user to clear the mode once firmware flash is complete. + * * Refer :ref:`xe_configfs` for more details on how to use configfs * * Survivability mode is indicated by the below admin-only readable sysfs which provides additional * debug information:: * - * /sys/bus/pci/devices/<device>/surivability_mode + * /sys/bus/pci/devices/<device>/survivability_mode * * Capability Information: * Provides boot status @@ -55,6 +61,22 @@ * Provides history of previous failures * Auxiliary Information * Certain failures may have information in addition to postcode information + * + * Runtime Survivability + * ===================== + * + * Certain runtime firmware errors can cause the device to enter a wedged state + * (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation. + * Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and + * is indicated by the presence of survivability mode sysfs:: + * + * /sys/bus/pci/devices/<device>/survivability_mode + * + * Survivability mode sysfs provides information about the type of survivability mode. + * + * When such errors occur, userspace is notified with the drm device wedged uevent and runtime + * survivability mode. User can then initiate a firmware flash using userspace tools like fwupd + * to restore device to normal operation. */ static u32 aux_history_offset(u32 reg_value) @@ -120,6 +142,14 @@ static void log_survivability_info(struct pci_dev *pdev) } } +static int check_boot_failure(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + + return survivability->boot_status == NON_CRITICAL_FAILURE || + survivability->boot_status == CRITICAL_FAILURE; +} + static ssize_t survivability_mode_show(struct device *dev, struct device_attribute *attr, char *buff) { @@ -129,6 +159,12 @@ static ssize_t survivability_mode_show(struct device *dev, struct xe_survivability_info *info = survivability->info; int index = 0, count = 0; + count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n", + survivability->type ? "Runtime" : "Boot"); + + if (!check_boot_failure(xe)) + return count; + for (index = 0; index < MAX_SCRATCH_MMIO; index++) { if (info[index].reg) count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name, @@ -146,16 +182,14 @@ static void xe_survivability_mode_fini(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct device *dev = &pdev->dev; - xe_configfs_clear_survivability_mode(pdev); sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr); } -static int enable_survivability_mode(struct pci_dev *pdev) +static int create_survivability_sysfs(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct xe_device *xe = pdev_to_xe_device(pdev); - struct xe_survivability *survivability = &xe->survivability; - int ret = 0; + int ret; /* create survivability mode sysfs */ ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr); @@ -169,35 +203,72 @@ static int enable_survivability_mode(struct pci_dev *pdev) if (ret) return ret; + return 0; +} + +static int enable_boot_survivability_mode(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct xe_device *xe = pdev_to_xe_device(pdev); + struct xe_survivability *survivability = &xe->survivability; + int ret = 0; + + ret = create_survivability_sysfs(pdev); + if (ret) + return ret; + /* Make sure xe_heci_gsc_init() knows about survivability mode */ survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; +} + +static int init_survivability_mode(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct xe_survivability_info *info; + + survivability->size = MAX_SCRATCH_MMIO; + + info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + survivability->info = info; + + populate_survivability_info(xe); + + return 0; } /** - * xe_survivability_mode_is_enabled - check if survivability mode is enabled + * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled * @xe: xe device instance * - * Returns true if in survivability mode, false otherwise + * Returns true if in boot survivability mode of type, else false */ -bool xe_survivability_mode_is_enabled(struct xe_device *xe) +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe) { - return xe->survivability.mode; + struct xe_survivability *survivability = &xe->survivability; + + return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT; } /** @@ -218,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) u32 data; bool survivability_mode; - if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) return false; survivability_mode = xe_configfs_get_survivability_mode(pdev); - - if (xe->info.platform < XE_BATTLEMAGE) { - if (survivability_mode) { - dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n"); - xe_configfs_clear_survivability_mode(pdev); - } - return false; - } - /* Enable survivability mode if set via configfs */ if (survivability_mode) return true; @@ -238,44 +300,78 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe) data = xe_mmio_read32(mmio, PCODE_SCRATCH(0)); survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data); - return survivability->boot_status == NON_CRITICAL_FAILURE || - survivability->boot_status == CRITICAL_FAILURE; + return check_boot_failure(xe); } /** - * xe_survivability_mode_enable - Initialize and enable the survivability mode + * xe_survivability_mode_runtime_enable - Initialize and enable runtime survivability mode * @xe: xe device instance * - * Initialize survivability information and enable survivability mode + * Initialize survivability information and enable runtime survivability mode. + * Runtime survivability mode is enabled when certain errors cause the device to be + * in non-recoverable state. The device is declared wedged with the appropriate + * recovery method and survivability mode sysfs exposed to userspace * - * Return: 0 if survivability mode is enabled or not requested; negative error - * code otherwise. + * Return: 0 if runtime survivability mode is enabled, negative error code otherwise. */ -int xe_survivability_mode_enable(struct xe_device *xe) +int xe_survivability_mode_runtime_enable(struct xe_device *xe) { struct xe_survivability *survivability = &xe->survivability; - struct xe_survivability_info *info; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; - if (!xe_survivability_mode_is_requested(xe)) - return 0; + if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) { + dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n"); + return -EINVAL; + } - survivability->size = MAX_SCRATCH_MMIO; + ret = init_survivability_mode(xe); + if (ret) + return ret; - info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info), - GFP_KERNEL); - if (!info) - return -ENOMEM; + ret = create_survivability_sysfs(pdev); + if (ret) + dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n"); - survivability->info = info; + survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME; + dev_err(&pdev->dev, "Runtime Survivability mode enabled\n"); - populate_survivability_info(xe); + xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR); + xe_device_declare_wedged(xe); + dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n"); + + return 0; +} + +/** + * xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode + * @xe: xe device instance + * + * Initialize survivability information and enable boot survivability mode + * + * Return: 0 if boot survivability mode is enabled or not requested, negative error + * code otherwise. + */ +int xe_survivability_mode_boot_enable(struct xe_device *xe) +{ + struct xe_survivability *survivability = &xe->survivability; + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + int ret; + + if (!xe_survivability_mode_is_requested(xe)) + return 0; + + ret = init_survivability_mode(xe); + if (ret) + return ret; - /* Only log debug information and exit if it is a critical failure */ + /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */ if (survivability->boot_status == CRITICAL_FAILURE) { log_survivability_info(pdev); return -ENXIO; } - return enable_survivability_mode(pdev); + survivability->type = XE_SURVIVABILITY_TYPE_BOOT; + + return enable_boot_survivability_mode(pdev); } diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h index 02231c2bf008..1cc94226aa82 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode.h @@ -10,8 +10,9 @@ struct xe_device; -int xe_survivability_mode_enable(struct xe_device *xe); -bool xe_survivability_mode_is_enabled(struct xe_device *xe); +int xe_survivability_mode_boot_enable(struct xe_device *xe); +int xe_survivability_mode_runtime_enable(struct xe_device *xe); +bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe); bool xe_survivability_mode_is_requested(struct xe_device *xe); #endif /* _XE_SURVIVABILITY_MODE_H_ */ diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h index 19d433e253df..cd65a5d167c9 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h +++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h @@ -9,6 +9,11 @@ #include <linux/limits.h> #include <linux/types.h> +enum xe_survivability_type { + XE_SURVIVABILITY_TYPE_BOOT, + XE_SURVIVABILITY_TYPE_RUNTIME, +}; + struct xe_survivability_info { char name[NAME_MAX]; u32 reg; @@ -30,6 +35,9 @@ struct xe_survivability { /** @mode: boolean to indicate survivability mode */ bool mode; + + /** @type: survivability type */ + enum xe_survivability_type type; }; #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */ diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 26418e9bdff0..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,16 +3,21 @@ * Copyright © 2024 Intel Corporation */ +#include <drm/drm_drv.h> + #include "xe_bo.h" +#include "xe_exec_queue_types.h" #include "xe_gt_stats.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" +#include "xe_vram_types.h" static bool xe_svm_range_in_vram(struct xe_svm_range *range) { @@ -21,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) * memory. */ - struct drm_gpusvm_range_flags flags = { + struct drm_gpusvm_pages_flags flags = { /* Pairs with WRITE_ONCE in drm_gpusvm.c */ - .__flags = READ_ONCE(range->base.flags.__flags), + .__flags = READ_ONCE(range->base.pages.flags.__flags), }; return flags.has_devmem_pages; @@ -45,15 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) return gpusvm_to_vm(r->gpusvm); } -#define range_debug(r__, operaton__) \ +#define range_debug(r__, operation__) \ vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ "start=0x%014lx, end=0x%014lx, size=%lu", \ - (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ + (operation__), range_to_vm(&(r__)->base)->usm.asid, \ (r__)->base.gpusvm, \ xe_svm_range_in_vram((r__)) ? 1 : 0, \ xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ - (r__)->base.notifier_seq, \ + (r__)->base.pages.notifier_seq, \ xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ xe_svm_range_size((r__))) @@ -62,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) range_debug(range, operation); } -static void *xe_svm_devm_owner(struct xe_device *xe) -{ - return xe; -} - static struct drm_gpusvm_range * xe_svm_range_alloc(struct drm_gpusvm *gpusvm) { @@ -108,6 +108,11 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, &vm->svm.garbage_collector.work); } +static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) +{ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); +} + static u8 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, const struct mmu_notifier_range *mmu_range, @@ -124,7 +129,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, range_debug(range, "NOTIFIER"); /* Skip if already unmapped or if no binding exist */ - if (range->base.flags.unmapped || !range->tile_present) + if (range->base.pages.flags.unmapped || !range->tile_present) return 0; range_debug(range, "NOTIFIER - EXECUTE"); @@ -140,13 +145,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, */ for_each_tile(tile, xe, id) if (xe_pt_zap_ptes_range(tile, vm, range)) { - tile_mask |= BIT(id); /* * WRITE_ONCE pairs with READ_ONCE in * xe_vm_has_valid_gpu_mapping() */ WRITE_ONCE(range->tile_invalidated, range->tile_invalidated | BIT(id)); + + if (!(tile_mask & BIT(id))) { + xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); + if (tile->media_gt) + xe_svm_tlb_inval_count_stats_incr(tile->media_gt); + tile_mask |= BIT(id); + } } return tile_mask; @@ -166,6 +177,24 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, mmu_range); } +static s64 xe_svm_stats_ktime_us_delta(ktime_t start) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? + ktime_us_delta(ktime_get(), start) : 0; +} + +static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); +} + +static ktime_t xe_svm_stats_ktime_get(void) +{ + return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0; +} + static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct drm_gpusvm_notifier *notifier, const struct mmu_notifier_range *mmu_range) @@ -173,8 +202,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, struct xe_vm *vm = gpusvm_to_vm(gpusvm); struct xe_device *xe = vm->xe; struct drm_gpusvm_range *r, *first; + struct xe_tile *tile; + ktime_t start = xe_svm_stats_ktime_get(); u64 adj_start = mmu_range->start, adj_end = mmu_range->end; - u8 tile_mask = 0; + u8 tile_mask = 0, id; long err; xe_svm_assert_in_notifier(vm); @@ -220,13 +251,20 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, xe_device_wmb(xe); - err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask); + err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask); WARN_ON_ONCE(err); range_notifier_event_end: r = first; drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) xe_svm_range_notifier_event_end(vm, r, mmu_range); + for_each_tile(tile, xe, id) { + if (tile_mask & BIT(id)) { + xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); + if (tile->media_gt) + xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); + } + } } static int __xe_svm_garbage_collector(struct xe_vm *vm, @@ -248,24 +286,78 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm, return 0; } +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end) +{ + struct xe_vma *vma; + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + }; + int err = 0; + + vma = xe_vm_find_vma_by_addr(vm, range_start); + if (!vma) + return -EINVAL; + + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + + if (xe_vma_has_default_mem_attrs(vma)) + return 0; + + vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", + xe_vma_start(vma), xe_vma_end(vma)); + + if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) { + default_attr.pat_index = vma->attr.default_pat_index; + default_attr.default_pat_index = vma->attr.default_pat_index; + vma->attr = default_attr; + } else { + vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx", + range_start, range_end); + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start); + if (err) { + drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err)); + xe_vm_kill(vm, true); + return err; + } + } + + /* + * On call from xe_svm_handle_pagefault original VMA might be changed + * signal this to lookup for VMA again. + */ + return -EAGAIN; +} + static int xe_svm_garbage_collector(struct xe_vm *vm) { struct xe_svm_range *range; - int err; + u64 range_start; + u64 range_end; + int err, ret = 0; lockdep_assert_held_write(&vm->lock); if (xe_vm_is_closed_or_banned(vm)) return -ENOENT; - spin_lock(&vm->svm.garbage_collector.lock); for (;;) { + spin_lock(&vm->svm.garbage_collector.lock); range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, typeof(*range), garbage_collector_link); if (!range) break; + range_start = xe_svm_range_start(range); + range_end = xe_svm_range_end(range); + list_del(&range->garbage_collector_link); spin_unlock(&vm->svm.garbage_collector.lock); @@ -278,11 +370,17 @@ static int xe_svm_garbage_collector(struct xe_vm *vm) return err; } - spin_lock(&vm->svm.garbage_collector.lock); + err = xe_svm_range_set_default_attr(vm, range_start, range_end); + if (err) { + if (err == -EAGAIN) + ret = -EAGAIN; + else + return err; + } } spin_unlock(&vm->svm.garbage_collector.lock); - return 0; + return ret; } static void xe_svm_garbage_collector_work_func(struct work_struct *w) @@ -295,28 +393,22 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *page_to_vr(struct page *page) { return container_of(page_pgmap(page), struct xe_vram_region, pagemap); } -static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) -{ - return container_of(vr, struct xe_tile, mem.vram); -} - static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, struct page *page) { u64 dpa; - struct xe_tile *tile = vr_to_tile(vr); u64 pfn = page_to_pfn(page); u64 offset; - xe_tile_assert(tile, is_device_private_page(page)); - xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); + xe_assert(vr->xe, is_device_private_page(page)); + xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base); offset = (pfn << PAGE_SHIFT) - vr->hpa_base; dpa = vr->dpa_base + offset; @@ -329,17 +421,74 @@ enum xe_svm_copy_dir { XE_SVM_COPY_TO_SRAM, }; -static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, +static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + int kb) +{ + if (dir == XE_SVM_COPY_TO_VRAM) + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); + else + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); +} + +static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, + const enum xe_svm_copy_dir dir, + unsigned long npages, + ktime_t start) +{ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); + + if (dir == XE_SVM_COPY_TO_VRAM) { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, + us_delta); + } else { + switch (npages) { + case 1: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, + us_delta); + break; + case 16: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, + us_delta); + break; + case 512: + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, + us_delta); + break; + } + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, + us_delta); + } +} + +static int xe_svm_copy(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages, const enum xe_svm_copy_dir dir) { struct xe_vram_region *vr = NULL; - struct xe_tile *tile; + struct xe_gt *gt = NULL; + struct xe_device *xe; struct dma_fence *fence = NULL; unsigned long i; #define XE_VRAM_ADDR_INVALID ~0x0ull u64 vram_addr = XE_VRAM_ADDR_INVALID; int err = 0, pos = 0; bool sram = dir == XE_SVM_COPY_TO_SRAM; + ktime_t start = xe_svm_stats_ktime_get(); /* * This flow is complex: it locates physically contiguous device pages, @@ -361,12 +510,13 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, last = (i + 1) == npages; /* No CPU page and no device pages queue'd to copy */ - if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) + if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID) continue; if (!vr && spage) { vr = page_to_vr(spage); - tile = vr_to_tile(vr); + gt = xe_migrate_exec_queue(vr->migrate)->gt; + xe = vr->xe; } XE_WARN_ON(spage && page_to_vr(spage) != vr); @@ -375,7 +525,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, * first device page, check if physical contiguous on subsequent * device pages. */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { __vram_addr = xe_vram_region_page_to_dpa(vr, spage); if (vram_addr == XE_VRAM_ADDR_INVALID) { vram_addr = __vram_addr; @@ -383,6 +533,14 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, } match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; + /* Expected with contiguous memory */ + xe_assert(vr->xe, match); + + if (pagemap_addr[i].order) { + i += NR_PAGES(pagemap_addr[i].order) - 1; + chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); + last = (i + 1) == npages; + } } /* @@ -397,21 +555,26 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, int incr = (match && last) ? 1 : 0; if (vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (i - pos + incr) * + (PAGE_SIZE / SZ_1K)); if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - vram_addr, (u64)dma_addr[pos], i - pos + incr); - __fence = xe_migrate_from_vram(tile->migrate, + vram_addr, + (u64)pagemap_addr[pos].addr, i - pos + incr); + __fence = xe_migrate_from_vram(vr->migrate, i - pos + incr, vram_addr, - dma_addr + pos); + &pagemap_addr[pos]); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", - (u64)dma_addr[pos], vram_addr, i - pos + incr); - __fence = xe_migrate_to_vram(tile->migrate, + (u64)pagemap_addr[pos].addr, vram_addr, + i - pos + incr); + __fence = xe_migrate_to_vram(vr->migrate, i - pos + incr, - dma_addr + pos, + &pagemap_addr[pos], vram_addr); } if (IS_ERR(__fence)) { @@ -424,7 +587,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, } /* Setup physical address of next device page */ - if (dma_addr[i] && spage) { + if (pagemap_addr[i].addr && spage) { vram_addr = __vram_addr; pos = i; } else { @@ -433,19 +596,21 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, /* Extra mismatched device page, copy it */ if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { + xe_svm_copy_kb_stats_incr(gt, dir, + (PAGE_SIZE / SZ_1K)); if (sram) { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - vram_addr, (u64)dma_addr[pos], 1); - __fence = xe_migrate_from_vram(tile->migrate, 1, + vram_addr, (u64)pagemap_addr[pos].addr, 1); + __fence = xe_migrate_from_vram(vr->migrate, 1, vram_addr, - dma_addr + pos); + &pagemap_addr[pos]); } else { - vm_dbg(&tile->xe->drm, + vm_dbg(&xe->drm, "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", - (u64)dma_addr[pos], vram_addr, 1); - __fence = xe_migrate_to_vram(tile->migrate, 1, - dma_addr + pos, + (u64)pagemap_addr[pos].addr, vram_addr, 1); + __fence = xe_migrate_to_vram(vr->migrate, 1, + &pagemap_addr[pos], vram_addr); } if (IS_ERR(__fence)) { @@ -466,33 +631,45 @@ err_out: dma_fence_put(fence); } + /* + * XXX: We can't derive the GT here (or anywhere in this functions, but + * compute always uses the primary GT so accumlate stats on the likely + * GT of the fault. + */ + if (gt) + xe_svm_copy_us_stats_incr(gt, dir, npages, start); + return err; #undef XE_MIGRATE_CHUNK_SIZE #undef XE_VRAM_ADDR_INVALID } -static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, +static int xe_svm_copy_to_devmem(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM); } -static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, +static int xe_svm_copy_to_ram(struct page **pages, + struct drm_pagemap_addr *pagemap_addr, unsigned long npages) { - return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); + return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -500,12 +677,12 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) return PHYS_PFN(offset + vr->hpa_base); } -static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) +static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram) { - return &tile->mem.vram.ttm.mm; + return &vram->ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -516,8 +693,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio list_for_each_entry(block, blocks, link) { struct xe_vram_region *vr = block->private; - struct xe_tile *tile = vr_to_tile(vr); - struct drm_buddy *buddy = tile_to_buddy(tile); + struct drm_buddy *buddy = vram_to_buddy(vr); u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); int i; @@ -528,7 +704,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, @@ -561,22 +737,25 @@ int xe_svm_init(struct xe_vm *vm) { int err; - spin_lock_init(&vm->svm.garbage_collector.lock); - INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); - INIT_WORK(&vm->svm.garbage_collector.work, - xe_svm_garbage_collector_work_func); - - err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, - current->mm, xe_svm_devm_owner(vm->xe), 0, - vm->size, xe_modparam.svm_notifier_size * SZ_1M, - &gpusvm_ops, fault_chunk_sizes, - ARRAY_SIZE(fault_chunk_sizes)); - if (err) - return err; - - drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + if (vm->flags & XE_VM_FLAG_FAULT_MODE) { + spin_lock_init(&vm->svm.garbage_collector.lock); + INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); + INIT_WORK(&vm->svm.garbage_collector.work, + xe_svm_garbage_collector_work_func); + + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, + current->mm, 0, vm->size, + xe_modparam.svm_notifier_size * SZ_1M, + &gpusvm_ops, fault_chunk_sizes, + ARRAY_SIZE(fault_chunk_sizes)); + drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); + } else { + err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", + &vm->xe->drm, NULL, 0, 0, 0, NULL, + NULL, 0); + } - return 0; + return err; } /** @@ -647,7 +826,7 @@ bool xe_svm_range_validate(struct xe_vm *vm, xe_svm_notifier_lock(vm); ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask && - (devmem_preferred == range->base.flags.has_devmem_pages); + (devmem_preferred == range->base.pages.flags.has_devmem_pages); xe_svm_notifier_unlock(vm); @@ -676,75 +855,60 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *v min(end, xe_vma_end(vma))); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) -static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) { - return &tile->mem.vram; -} - -/** - * xe_svm_alloc_vram()- Allocate device memory pages for range, - * migrating existing data. - * @vm: The VM. - * @tile: tile to allocate vram from - * @range: SVM range - * @ctx: DRM GPU SVM context - * - * Return: 0 on success, error code on failure. - */ -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) -{ - struct mm_struct *mm = vm->svm.gpusvm.mm; - struct xe_vram_region *vr = tile_to_vr(tile); + struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap); + struct xe_device *xe = vr->xe; + struct device *dev = xe->drm.dev; struct drm_buddy_block *block; + struct xe_validation_ctx vctx; struct list_head *blocks; + struct drm_exec exec; struct xe_bo *bo; - ktime_t end = 0; - int err; - - range_debug(range, "ALLOCATE VRAM"); - - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); - -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), - ttm_bo_type_device, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_CPU_ADDR_MIRROR); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry; - goto unlock; - } + int err = 0, idx; + + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; + + xe_pm_runtime_get(xe); + + xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { + bo = xe_bo_create_locked(xe, NULL, NULL, end - start, + ttm_bo_type_device, + (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | + XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); + drm_exec_retry_on_contention(&exec); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + xe_validation_retry_on_oom(&vctx, &err); + break; + } - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, dpagemap, end - start); - blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; - list_for_each_entry(block, blocks, link) - block->private = vr; + blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; + list_for_each_entry(block, blocks, link) + block->private = vr; - xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); - if (err) - xe_svm_devmem_release(&bo->devmem_allocation); + xe_bo_get(bo); - xe_bo_unlock(bo); - xe_bo_put(bo); - -unlock: - mmap_read_unlock(mm); - mmput(mm); + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); + if (err) + xe_svm_devmem_release(&bo->devmem_allocation); + xe_bo_unlock(bo); + xe_bo_put(bo); + } + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } @@ -772,17 +936,17 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm struct xe_vm *vm = range_to_vm(&range->base); u64 range_size = xe_svm_range_size(range); - if (!range->base.flags.migrate_devmem || !preferred_region_is_vram) + if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram) return false; xe_assert(vm->xe, IS_DGFX(vm->xe)); - if (preferred_region_is_vram && xe_svm_range_in_vram(range)) { + if (xe_svm_range_in_vram(range)) { drm_info(&vm->xe->drm, "Range is already in VRAM\n"); return false; } - if (preferred_region_is_vram && range_size < SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } @@ -790,40 +954,78 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm return true; } -/** - * xe_svm_handle_pagefault() - SVM handle page fault - * @vm: The VM. - * @vma: The CPU address mirror VMA. - * @gt: The gt upon the fault occurred. - * @fault_addr: The GPU fault address. - * @atomic: The fault atomic access bit. - * - * Create GPU bindings for a SVM page fault. Optionally migrate to device - * memory. - * - * Return: 0 on success, negative error code on error. - */ -int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, - struct xe_gt *gt, u64 fault_addr, - bool atomic) +#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ +static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range) \ +{ \ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) +DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) + +#define DECL_SVM_RANGE_US_STATS(elem, stat) \ +static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ + struct xe_svm_range *range, \ + ktime_t start) \ +{ \ + s64 us_delta = xe_svm_stats_ktime_us_delta(start); \ +\ + switch (xe_svm_range_size(range)) { \ + case SZ_4K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_64K: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ + us_delta); \ + break; \ + case SZ_2M: \ + xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ + us_delta); \ + break; \ + } \ +} \ + +DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) +DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) +DECL_SVM_RANGE_US_STATS(bind, BIND) +DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) + +static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool need_vram) { + int devmem_possible = IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), - .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), - .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, - .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), - .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? + .devmem_possible = devmem_possible, + .check_pages_threshold = devmem_possible ? SZ_64K : 0, + .devmem_only = need_vram && devmem_possible, + .timeslice_ms = need_vram && devmem_possible ? vm->xe->atomic_svm_timeslice_ms : 0, + .device_private_page_owner = xe_svm_devm_owner(vm->xe), }; + struct xe_validation_ctx vctx; + struct drm_exec exec; struct xe_svm_range *range; struct dma_fence *fence; + struct drm_pagemap *dpagemap; struct xe_tile *tile = gt_to_tile(gt); int migrate_try_count = ctx.devmem_only ? 3 : 1; - ktime_t end = 0; + ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start; int err; lockdep_assert_held_write(&vm->lock); @@ -837,29 +1039,58 @@ retry: if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) return PTR_ERR(range); - if (ctx.devmem_only && !range->base.flags.migrate_devmem) - return -EACCES; + xe_svm_range_fault_count_stats_incr(gt, range); - if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) - return 0; + if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { + err = -EACCES; + goto out; + } + + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) { + xe_svm_range_valid_fault_count_stats_incr(gt, range); + range_debug(range, "PAGE FAULT - VALID"); + goto out; + } range_debug(range, "PAGE FAULT"); if (--migrate_try_count >= 0 && - xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { + ktime_t migrate_start = xe_svm_stats_ktime_get(); + + /* TODO : For multi-device dpagemap will be used to find the + * remote tile and remote device. Will need to modify + * xe_svm_alloc_vram to use dpagemap for future multi-device + * support. + */ + xe_svm_range_migrate_count_stats_incr(gt, range); + err = xe_svm_alloc_vram(tile, range, &ctx); + xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -869,6 +1100,9 @@ retry: } } +get_pages: + get_pages_start = xe_svm_stats_ktime_get(); + range_debug(range, "GET PAGES"); err = xe_svm_range_get_pages(vm, range, &ctx); /* Corner where CPU mappings have changed */ @@ -888,37 +1122,89 @@ retry: } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); - goto err_out; + goto out; } + xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); range_debug(range, "PAGE FAULT - BIND"); -retry_bind: - xe_vm_lock(vm, false); - fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); - if (IS_ERR(fence)) { - xe_vm_unlock(vm); - err = PTR_ERR(fence); - if (err == -EAGAIN) { - ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ - range_debug(range, "PAGE FAULT - RETRY BIND"); - goto retry; + bind_start = xe_svm_stats_ktime_get(); + xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); + + xe_vm_set_validation_exec(vm, &exec); + fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); + xe_vm_set_validation_exec(vm, NULL); + if (IS_ERR(fence)) { + drm_exec_retry_on_contention(&exec); + err = PTR_ERR(fence); + xe_validation_retry_on_oom(&vctx, &err); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + break; } - if (xe_vm_validate_should_retry(NULL, err, &end)) - goto retry_bind; - goto err_out; } - xe_vm_unlock(vm); + if (err) + goto err_out; dma_fence_wait(fence, false); dma_fence_put(fence); + xe_svm_range_bind_us_stats_incr(gt, range, bind_start); + +out: + xe_svm_range_fault_us_stats_incr(gt, range, start); + return 0; err_out: + if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + range_debug(range, "PAGE FAULT - RETRY BIND"); + goto retry; + } return err; } /** + * xe_svm_handle_pagefault() - SVM handle page fault + * @vm: The VM. + * @vma: The CPU address mirror VMA. + * @gt: The gt upon the fault occurred. + * @fault_addr: The GPU fault address. + * @atomic: The fault atomic access bit. + * + * Create GPU bindings for a SVM page fault. Optionally migrate to device + * memory. + * + * Return: 0 on success, negative error code on error. + */ +int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, + struct xe_gt *gt, u64 fault_addr, + bool atomic) +{ + int need_vram, ret; +retry: + need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); + if (need_vram < 0) + return need_vram; + + ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, + need_vram ? true : false); + if (ret == -EAGAIN) { + /* + * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA + * may have been split by xe_svm_range_set_default_attr. + */ + vma = xe_vm_find_vma_by_addr(vm, fault_addr); + if (!vma) + return -EINVAL; + + goto retry; + } + return ret; +} + +/** * xe_svm_has_mapping() - SVM has mappings * @vm: The VM. * @start: Start address. @@ -934,6 +1220,41 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) } /** + * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges + * @vm: The VM + * @start: start addr + * @end: end addr + * + * This function UNMAPS svm ranges if start or end address are inside them. + */ +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier, *next; + + lockdep_assert_held_write(&vm->lock); + + drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *range, *__next; + + drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) { + if (start > drm_gpusvm_range_start(range) || + end < drm_gpusvm_range_end(range)) { + if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range))) + drm_gpusvm_range_evict(&vm->svm.gpusvm, range); + drm_gpusvm_range_get(range); + __xe_svm_garbage_collector(vm, to_xe_range(range)); + if (!list_empty(&to_xe_range(range)->garbage_collector_link)) { + spin_lock(&vm->svm.garbage_collector.lock); + list_del(&to_xe_range(range)->garbage_collector_link); + spin_unlock(&vm->svm.garbage_collector.lock); + } + drm_gpusvm_range_put(range); + } + } + } +} + +/** * xe_svm_bo_evict() - SVM evict BO to system memory * @bo: BO to evict * @@ -944,7 +1265,7 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); } /** @@ -967,7 +1288,7 @@ struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), xe_vma_start(vma), xe_vma_end(vma), ctx); if (IS_ERR(r)) - return ERR_PTR(PTR_ERR(r)); + return ERR_CAST(r); return to_xe_range(r); } @@ -997,9 +1318,119 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, return err; } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +/** + * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range + * @vm: Pointer to the xe_vm structure + * @start: Start of the input range + * @end: End of the input range + * + * This function removes the page table entries (PTEs) associated + * with the svm ranges within the given input start and end + * + * Return: tile_mask for which gt's need to be tlb invalidated. + */ +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpusvm_notifier *notifier; + struct xe_svm_range *range; + u64 adj_start, adj_end; + struct xe_tile *tile; + u8 tile_mask = 0; + u8 id; + + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && + lockdep_is_held_type(&vm->lock, 0)); + + drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) { + struct drm_gpusvm_range *r = NULL; + + adj_start = max(start, drm_gpusvm_notifier_start(notifier)); + adj_end = min(end, drm_gpusvm_notifier_end(notifier)); + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { + range = to_xe_range(r); + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes_range(tile, vm, range)) { + tile_mask |= BIT(id); + /* + * WRITE_ONCE pairs with READ_ONCE in + * xe_vm_has_valid_gpu_mapping(). + * Must not fail after setting + * tile_invalidated and before + * TLB invalidation. + */ + WRITE_ONCE(range->tile_invalidated, + range->tile_invalidated | BIT(id)); + } + } + } + } -static struct drm_pagemap_device_addr + return tile_mask; +} + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + +static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram->dpagemap; +} + +/** + * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA + * @vma: Pointer to the xe_vma structure containing memory attributes + * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping + * + * This function determines the correct DRM pagemap to use for a given VMA. + * It first checks if a valid devmem_fd is provided in the VMA's preferred + * location. If the devmem_fd is negative, it returns NULL, indicating no + * pagemap is available and smem to be used as preferred location. + * If the devmem_fd is equal to the default faulting + * GT identifier, it returns the VRAM pagemap associated with the tile. + * + * Future support for multi-device configurations may use drm_pagemap_from_fd() + * to resolve pagemaps from arbitrary file descriptors. + * + * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable. + */ +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + s32 fd = (s32)vma->attr.preferred_loc.devmem_fd; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) + return NULL; + + if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) + return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL; + + /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */ + return NULL; +} + +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); +} + +static struct drm_pagemap_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, struct page *page, @@ -1018,11 +1449,12 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, prot = 0; } - return drm_pagemap_device_addr_encode(addr, prot, order, dir); + return drm_pagemap_addr_encode(addr, prot, order, dir); } static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -1054,7 +1486,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); @@ -1075,7 +1507,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx) { @@ -1086,6 +1518,11 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) { return 0; } + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} #endif /** diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 19ce4f2754a7..0955d2ac8d74 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -6,6 +6,20 @@ #ifndef _XE_SVM_H_ #define _XE_SVM_H_ +struct xe_device; + +/** + * xe_svm_devm_owner() - Return the owner of device private memory + * @xe: The xe device. + * + * Return: The owner of this device's device private memory to use in + * hmm_range_fault()- + */ +static inline void *xe_svm_devm_owner(struct xe_device *xe) +{ + return xe; +} + #if IS_ENABLED(CONFIG_DRM_XE_GPUSVM) #include <drm/drm_pagemap.h> @@ -70,8 +84,7 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx); struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, @@ -91,6 +104,12 @@ bool xe_svm_range_validate(struct xe_vm *vm, u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma); +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end); + +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end); + +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile); + /** * xe_svm_range_has_dma_mapping() - SVM range has DMA mapping * @range: SVM range @@ -100,7 +119,7 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *v static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) { lockdep_assert_held(&range->base.gpusvm->notifier_lock); - return range->base.flags.has_dma_mapping; + return range->base.pages.flags.has_dma_mapping; } /** @@ -150,21 +169,13 @@ static inline unsigned long xe_svm_range_size(struct xe_svm_range *range) return drm_gpusvm_range_size(&range->base); } -#define xe_svm_assert_in_notifier(vm__) \ - lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) - -#define xe_svm_notifier_lock(vm__) \ - drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) - -#define xe_svm_notifier_unlock(vm__) \ - drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) - void xe_svm_flush(struct xe_vm *vm); #else #include <linux/interval_tree.h> +#include "xe_vm.h" -struct drm_pagemap_device_addr; +struct drm_pagemap_addr; struct drm_gpusvm_ctx; struct drm_gpusvm_range; struct xe_bo; @@ -179,7 +190,9 @@ struct xe_vram_region; struct xe_svm_range { struct { struct interval_tree_node itree; - const struct drm_pagemap_device_addr *dma_addr; + struct { + const struct drm_pagemap_addr *dma_addr; + } pages; } base; u32 tile_present; u32 tile_invalidated; @@ -199,12 +212,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) static inline int xe_svm_init(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm, + NULL, NULL, 0, 0, 0, NULL, NULL, 0); +#else return 0; +#endif } static inline void xe_svm_fini(struct xe_vm *vm) { +#if IS_ENABLED(CONFIG_DRM_GPUSVM) + xe_assert(vm->xe, xe_vm_is_closed(vm)); + drm_gpusvm_fini(&vm->svm.gpusvm); +#endif } static inline @@ -237,10 +259,9 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -static inline -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) { return -EOPNOTSUPP; } @@ -305,19 +326,64 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vm return ULONG_MAX; } -#define xe_svm_assert_in_notifier(...) do {} while (0) +static inline +void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) +{ +} + +static inline +u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) +{ + return 0; +} + +static inline +struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) +{ + return NULL; +} + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} #define xe_svm_range_has_dma_mapping(...) false +#endif /* CONFIG_DRM_XE_GPUSVM */ + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */ +#define xe_svm_assert_in_notifier(vm__) \ + lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_assert_held_read(vm__) \ + lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_lock(vm__) \ + drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) + +#define xe_svm_notifier_lock_interruptible(vm__) \ + down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock) + +#define xe_svm_notifier_unlock(vm__) \ + drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm) + +#else +#define xe_svm_assert_in_notifier(...) do {} while (0) + +static inline void xe_svm_assert_held_read(struct xe_vm *vm) +{ +} static inline void xe_svm_notifier_lock(struct xe_vm *vm) { } -static inline void xe_svm_notifier_unlock(struct xe_vm *vm) +static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm) { + return 0; } -static inline void xe_svm_flush(struct xe_vm *vm) +static inline void xe_svm_notifier_unlock(struct xe_vm *vm) { } -#endif +#endif /* CONFIG_DRM_GPUSVM */ + #endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..82872a51f098 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 672faa0b67f1..d49ba3401963 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -7,9 +7,11 @@ #include <drm/drm_managed.h> +#include "xe_bo.h" #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -18,6 +20,8 @@ #include "xe_tile_sysfs.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h" +#include "xe_vram_types.h" /** * DOC: Multi-tile Design @@ -91,6 +95,35 @@ static int xe_tile_alloc(struct xe_tile *tile) if (!tile->mem.ggtt) return -ENOMEM; + tile->migrate = xe_migrate_alloc(tile); + if (!tile->migrate) + return -ENOMEM; + + return 0; +} + +/** + * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation + * @tile: Tile to perform allocations for + * + * Allocates VRAM per-tile data structures using DRM-managed allocations. + * Does not touch the hardware. + * + * Returns -ENOMEM if allocations fail, otherwise 0. + */ +int xe_tile_alloc_vram(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_vram_region *vram; + + if (!IS_DGFX(xe)) + return 0; + + vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id); + if (!vram) + return -ENOMEM; + tile->mem.vram = vram; + return 0; } @@ -126,21 +159,6 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) } ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ -static int tile_ttm_mgr_init(struct xe_tile *tile) -{ - struct xe_device *xe = tile_to_xe(tile); - int err; - - if (tile->mem.vram.usable_size) { - err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm); - if (err) - return err; - xe->info.mem_region_mask |= BIT(tile->id) << 1; - } - - return 0; -} - /** * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. * @tile: The tile to initialize. @@ -158,22 +176,31 @@ static int tile_ttm_mgr_init(struct xe_tile *tile) int xe_tile_init_noalloc(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); - int err; - - err = tile_ttm_mgr_init(tile); - if (err) - return err; xe_wa_apply_tile_workarounds(tile); if (xe->info.has_usm && IS_DGFX(xe)) - xe_devm_add(tile, &tile->mem.vram); + xe_devm_add(tile, tile->mem.vram); + + if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) { + int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram); + + if (err) + return err; + xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1; + } return xe_tile_sysfs_init(tile); } int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..dceb6297aa01 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -14,6 +14,13 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id); int xe_tile_init_noalloc(struct xe_tile *tile); int xe_tile_init(struct xe_tile *tile); +int xe_tile_alloc_vram(struct xe_tile *tile); + void xe_tile_migrate_wait(struct xe_tile *tile); +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c new file mode 100644 index 000000000000..5523874cba7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> + +#include "xe_pm.h" +#include "xe_sa.h" +#include "xe_tile_debugfs.h" + +static struct xe_tile *node_to_tile(struct drm_info_node *node) +{ + return node->dent->d_parent->d_inode->i_private; +} + +/** + * tile_debugfs_simple_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_tile specific. + * + * It is assumed that those debugfs files will be created on directory entry + * which struct dentry d_inode->i_private points to &xe_tile. + * + * /sys/kernel/debug/dri/0/ + * ├── tile0/ # tile = dentry->d_inode->i_private + * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_tile *, struct drm_printer *) + * + * Example:: + * + * int tile_id(struct xe_tile *tile, struct drm_printer *p) + * { + * drm_printf(p, "%u\n", tile->id); + * return 0; + * } + * + * static const struct drm_info_list info[] = { + * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id }, + * }; + * + * dir = debugfs_create_dir("tile0", parent); + * dir->d_inode->i_private = tile; + * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int tile_debugfs_simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data; + + return print(tile, &p); +} + +/** + * tile_debugfs_show_with_rpm - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref. + * + * Return: 0 on success or a negative error code on failure. + */ +static int tile_debugfs_show_with_rpm(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct xe_tile *tile = node_to_tile(node); + struct xe_device *xe = tile_to_xe(tile); + int ret; + + xe_pm_runtime_get(xe); + ret = tile_debugfs_simple_show(m, data); + xe_pm_runtime_put(xe); + + return ret; +} + +static int sa_info(struct xe_tile *tile, struct drm_printer *p) +{ + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, + xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool)); + + return 0; +} + +/* only for debugfs files which can be safely used on the VF */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "sa_info", .show = tile_debugfs_show_with_rpm, .data = sa_info }, +}; + +/** + * xe_tile_debugfs_register - Register tile's debugfs attributes + * @tile: the &xe_tile to register + * + * Create debugfs sub-directory with a name that includes a tile ID and + * then creates set of debugfs files (attributes) specific to this tile. + */ +void xe_tile_debugfs_register(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct drm_minor *minor = xe->drm.primary; + struct dentry *root = minor->debugfs_root; + char name[8]; + + snprintf(name, sizeof(name), "tile%u", tile->id); + tile->debugfs = debugfs_create_dir(name, root); + if (IS_ERR(tile->debugfs)) + return; + + /* + * Store the xe_tile pointer as private data of the tile/ directory + * node so other tile specific attributes under that directory may + * refer to it by looking at its parent node private data. + */ + tile->debugfs->d_inode->i_private = tile; + + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + tile->debugfs, minor); +} diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h new file mode 100644 index 000000000000..0e5f724de37f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TILE_DEBUGFS_H_ +#define _XE_TILE_DEBUGFS_H_ + +struct xe_tile; + +void xe_tile_debugfs_register(struct xe_tile *tile); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h new file mode 100644 index 000000000000..63640a42685d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tile_printk.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _xe_tile_printk_H_ +#define _xe_tile_printk_H_ + +#include "xe_printk.h" + +#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args + +#define xe_tile_printk(_tile, _level, _fmt, ...) \ + xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_err(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_once(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__) + +#define xe_tile_err_ratelimited(_tile, _fmt, ...) \ + xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__) + +#define xe_tile_warn(_tile, _fmt, ...) \ + xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__) + +#define xe_tile_notice(_tile, _fmt, ...) \ + xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__) + +#define xe_tile_info(_tile, _fmt, ...) \ + xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__) + +#define xe_tile_dbg(_tile, _fmt, ...) \ + xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__) + +#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \ + xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__) + +#define xe_tile_WARN(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \ + xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)) + +#define xe_tile_WARN_ON(_tile, _condition) \ + xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition)) + +#define xe_tile_WARN_ON_ONCE(_tile, _condition) \ + xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition)) + +static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_err(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + + xe_tile_info(tile, "%pV", vaf); +} + +static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_tile *tile = p->arg; + struct drm_printer dbg; + + /* + * The original xe_tile_dbg() callsite annotations are useless here, + * redirect to the tweaked xe_dbg_printer() instead. + */ + dbg = xe_dbg_printer(tile->xe); + dbg.origin = p->origin; + + drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf)); +} + +/** + * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err() + * @tile: the &xe_tile pointer to use in xe_tile_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_err, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info() + * @tile: the &xe_tile pointer to use in xe_tile_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_info, + .arg = tile, + }; + return p; +} + +/** + * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg() + * @tile: the &xe_tile pointer to use in xe_tile_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile) +{ + struct drm_printer p = { + .printfn = __xe_tile_printfn_dbg, + .arg = tile, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index b804234a6551..9e1236a9ec67 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile) kt->tile = tile; err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); - if (err) { - kobject_put(&kt->base); - return err; - } + if (err) + goto err_object; tile->sysfs = &kt->base; err = xe_vram_freq_sysfs_init(tile); if (err) - return err; + goto err_object; return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile); + +err_object: + kobject_put(&kt->base); + return err; } diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c new file mode 100644 index 000000000000..918a59e686ea --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "abi/guc_actions_abi.h" +#include "xe_device.h" +#include "xe_force_wake.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_gt_stats.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_tlb_inval.h" +#include "xe_mmio.h" +#include "xe_pm.h" +#include "xe_tlb_inval.h" +#include "xe_trace.h" + +/** + * DOC: Xe TLB invalidation + * + * Xe TLB invalidation is implemented in two layers. The first is the frontend + * API, which provides an interface for TLB invalidations to the driver code. + * The frontend handles seqno assignment, synchronization (fences), and the + * timeout mechanism. The frontend is implemented via an embedded structure + * xe_tlb_inval that includes a set of ops hooking into the backend. The backend + * interacts with the hardware (or firmware) to perform the actual invalidation. + */ + +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + +static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence) +{ + if (WARN_ON_ONCE(!fence->tlb_inval)) + return; + + xe_pm_runtime_put(fence->tlb_inval->xe); + fence->tlb_inval = NULL; /* fini() should be called once */ +} + +static void +xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + lockdep_assert_held(&fence->tlb_inval->pending_lock); + + list_del(&fence->link); + trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence); + xe_tlb_inval_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + spin_lock_irq(&tlb_inval->pending_lock); + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +static void xe_tlb_inval_fence_timeout(struct work_struct *work) +{ + struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval, + fence_tdr.work); + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval); + + tlb_inval->ops->flush(tlb_inval); + + spin_lock_irq(&tlb_inval->pending_lock); + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + s64 since_inval_ms = ktime_ms_delta(ktime_get(), + fence->inval_time); + + if (msecs_to_jiffies(since_inval_ms) < timeout_delay) + break; + + trace_xe_tlb_inval_fence_timeout(xe, fence); + drm_err(&xe->drm, + "TLB invalidation fence timeout, seqno=%d recv=%d", + fence->seqno, tlb_inval->seqno_recv); + + fence->base.error = -ETIME; + xe_tlb_inval_fence_signal(fence); + } + if (!list_empty(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + timeout_delay); + spin_unlock_irq(&tlb_inval->pending_lock); +} + +/** + * tlb_inval_fini - Clean up TLB invalidation state + * @drm: @drm_device + * @arg: pointer to struct @xe_tlb_inval + * + * Cancel pending fence workers and clean up any additional + * TLB invalidation state. + */ +static void tlb_inval_fini(struct drm_device *drm, void *arg) +{ + struct xe_tlb_inval *tlb_inval = arg; + + xe_tlb_inval_reset(tlb_inval); +} + +/** + * xe_gt_tlb_inval_init - Initialize TLB invalidation state + * @gt: GT structure + * + * Initialize TLB invalidation state, purely software initialization, should + * be called once during driver load. + * + * Return: 0 on success, negative error code on error. + */ +int xe_gt_tlb_inval_init_early(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tlb_inval *tlb_inval = >->tlb_inval; + int err; + + tlb_inval->xe = xe; + tlb_inval->seqno = 1; + INIT_LIST_HEAD(&tlb_inval->pending_fences); + spin_lock_init(&tlb_inval->pending_lock); + spin_lock_init(&tlb_inval->lock); + INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout); + + err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock); + if (err) + return err; + + tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm, + "gt-tbl-inval-job-wq", + WQ_MEM_RECLAIM); + if (IS_ERR(tlb_inval->job_wq)) + return PTR_ERR(tlb_inval->job_wq); + + /* XXX: Blindly setting up backend to GuC */ + xe_guc_tlb_inval_init_early(>->uc.guc, tlb_inval); + + return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval); +} + +/** + * xe_tlb_inval_reset() - TLB invalidation reset + * @tlb_inval: TLB invalidation client + * + * Signal any pending invalidation fences, should be called during a GT reset + */ +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence *fence, *next; + int pending_seqno; + + /* + * we can get here before the backends are even initialized if we're + * wedging very early, in which case there are not going to be any + * pendind fences so we can bail immediately. + */ + if (!tlb_inval->ops->initialized(tlb_inval)) + return; + + /* + * Backend is already disabled at this point. No new TLB requests can + * appear. + */ + + mutex_lock(&tlb_inval->seqno_lock); + spin_lock_irq(&tlb_inval->pending_lock); + cancel_delayed_work(&tlb_inval->fence_tdr); + /* + * We might have various kworkers waiting for TLB flushes to complete + * which are not tracked with an explicit TLB fence, however at this + * stage that will never happen since the backend is already disabled, + * so make sure we signal them here under the assumption that we have + * completed a full GT reset. + */ + if (tlb_inval->seqno == 1) + pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; + else + pending_seqno = tlb_inval->seqno - 1; + WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) + xe_tlb_inval_fence_signal(fence); + spin_unlock_irq(&tlb_inval->pending_lock); + mutex_unlock(&tlb_inval->seqno_lock); +} + +static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno) +{ + int seqno_recv = READ_ONCE(tlb_inval->seqno_recv); + + lockdep_assert_held(&tlb_inval->pending_lock); + + if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) + return false; + + if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) + return true; + + return seqno_recv >= seqno; +} + +static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence) +{ + struct xe_tlb_inval *tlb_inval = fence->tlb_inval; + + fence->seqno = tlb_inval->seqno; + trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence); + + spin_lock_irq(&tlb_inval->pending_lock); + fence->inval_time = ktime_get(); + list_add_tail(&fence->link, &tlb_inval->pending_fences); + + if (list_is_singular(&tlb_inval->pending_fences)) + queue_delayed_work(system_wq, &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + spin_unlock_irq(&tlb_inval->pending_lock); + + tlb_inval->seqno = (tlb_inval->seqno + 1) % + TLB_INVALIDATION_SEQNO_MAX; + if (!tlb_inval->seqno) + tlb_inval->seqno = 1; +} + +#define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \ +({ \ + int __ret; \ + \ + xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \ + xe_assert((__tlb_inval)->xe, (__fence)); \ + \ + mutex_lock(&(__tlb_inval)->seqno_lock); \ + xe_tlb_inval_fence_prep((__fence)); \ + __ret = op((__tlb_inval), (__fence)->seqno, ##args); \ + if (__ret < 0) \ + xe_tlb_inval_fence_signal_unlocked((__fence)); \ + mutex_unlock(&(__tlb_inval)->seqno_lock); \ + \ + __ret == -ECANCELED ? 0 : __ret; \ +}) + +/** + * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * + * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all); +} + +/** + * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT + * @tlb_inval: TLB invalidation client + * + * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and + * caller can use the invalidation fence to wait for completion. + * + * Return: 0 on success, negative error code on error + */ +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval) +{ + struct xe_tlb_inval_fence fence, *fence_ptr = &fence; + int ret; + + xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true); + ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt); + xe_tlb_inval_fence_wait(fence_ptr); + + return ret; +} + +/** + * xe_tlb_inval_range() - Issue a TLB invalidation for an address range + * @tlb_inval: TLB invalidation client + * @fence: invalidation fence which will be signal on TLB invalidation + * completion + * @start: start address + * @end: end address + * @asid: address space id + * + * Issue a range based TLB invalidation if supported, if not fallback to a full + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. + * + * Return: Negative error code on error, 0 on success + */ +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, u64 start, u64 end, + u32 asid) +{ + return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt, + start, end, asid); +} + +/** + * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM + * @tlb_inval: TLB invalidation client + * @vm: VM to invalidate + * + * Invalidate entire VM's address space + */ +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm) +{ + struct xe_tlb_inval_fence fence; + u64 range = 1ull << vm->xe->info.va_bits; + + xe_tlb_inval_fence_init(tlb_inval, &fence, true); + xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid); + xe_tlb_inval_fence_wait(&fence); +} + +/** + * xe_tlb_inval_done_handler() - TLB invalidation done handler + * @tlb_inval: TLB invalidation client + * @seqno: seqno of invalidation that is done + * + * Update recv seqno, signal any TLB invalidation fences, and restart TDR + */ +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno) +{ + struct xe_device *xe = tlb_inval->xe; + struct xe_tlb_inval_fence *fence, *next; + unsigned long flags; + + /* + * This can also be run both directly from the IRQ handler and also in + * process_g2h_msg(). Only one may process any individual CT message, + * however the order they are processed here could result in skipping a + * seqno. To handle that we just process all the seqnos from the last + * seqno_recv up to and including the one in msg[0]. The delta should be + * very small so there shouldn't be much of pending_fences we actually + * need to iterate over here. + * + * From GuC POV we expect the seqnos to always appear in-order, so if we + * see something later in the timeline we can be sure that anything + * appearing earlier has already signalled, just that we have yet to + * officially process the CT message like if racing against + * process_g2h_msg(). + */ + spin_lock_irqsave(&tlb_inval->pending_lock, flags); + if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) { + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); + return; + } + + WRITE_ONCE(tlb_inval->seqno_recv, seqno); + + list_for_each_entry_safe(fence, next, + &tlb_inval->pending_fences, link) { + trace_xe_tlb_inval_fence_recv(xe, fence); + + if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno)) + break; + + xe_tlb_inval_fence_signal(fence); + } + + if (!list_empty(&tlb_inval->pending_fences)) + mod_delayed_work(system_wq, + &tlb_inval->fence_tdr, + tlb_inval->ops->timeout_delay(tlb_inval)); + else + cancel_delayed_work(&tlb_inval->fence_tdr); + + spin_unlock_irqrestore(&tlb_inval->pending_lock, flags); +} + +static const char * +xe_inval_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "tlb_inval_fence"; +} + +static const struct dma_fence_ops inval_fence_ops = { + .get_driver_name = xe_inval_fence_get_driver_name, + .get_timeline_name = xe_inval_fence_get_timeline_name, +}; + +/** + * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence + * @tlb_inval: TLB invalidation client + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini + * will be automatically called when fence is signalled (all fences must signal), + * even on error. + */ +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(tlb_inval->xe); + + spin_lock_irq(&tlb_inval->lock); + dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(&tlb_inval->lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->tlb_inval = tlb_inval; +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h new file mode 100644 index 000000000000..554634dfd4e2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_H_ +#define _XE_TLB_INVAL_H_ + +#include <linux/types.h> + +#include "xe_tlb_inval_types.h" + +struct xe_gt; +struct xe_guc; +struct xe_vm; + +int xe_gt_tlb_inval_init_early(struct xe_gt *gt); + +void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval); +int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence); +int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval); +void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm); +int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + u64 start, u64 end, u32 asid); + +void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval, + struct xe_tlb_inval_fence *fence, + bool stack); + +/** + * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait + * @fence: TLB invalidation fence to wait on + * + * Wait on a TLB invalidiation fence until it signals, non interruptable + */ +static inline void +xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + +void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno); + +#endif /* _XE_TLB_INVAL_ */ diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c new file mode 100644 index 000000000000..492def04a559 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_assert.h" +#include "xe_dep_job_types.h" +#include "xe_dep_scheduler.h" +#include "xe_exec_queue.h" +#include "xe_gt_types.h" +#include "xe_tlb_inval.h" +#include "xe_tlb_inval_job.h" +#include "xe_migrate.h" +#include "xe_pm.h" + +/** struct xe_tlb_inval_job - TLB invalidation job */ +struct xe_tlb_inval_job { + /** @dep: base generic dependency Xe job */ + struct xe_dep_job dep; + /** @tlb_inval: TLB invalidation client */ + struct xe_tlb_inval *tlb_inval; + /** @q: exec queue issuing the invalidate */ + struct xe_exec_queue *q; + /** @refcount: ref count of this job */ + struct kref refcount; + /** + * @fence: dma fence to indicate completion. 1 way relationship - job + * can safely reference fence, fence cannot safely reference job. + */ + struct dma_fence *fence; + /** @start: Start address to invalidate */ + u64 start; + /** @end: End address to invalidate */ + u64 end; + /** @asid: Address space ID to invalidate */ + u32 asid; + /** @fence_armed: Fence has been armed */ + bool fence_armed; +}; + +static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + xe_tlb_inval_range(job->tlb_inval, ifence, job->start, + job->end, job->asid); + + return job->fence; +} + +static void xe_tlb_inval_job_free(struct xe_dep_job *dep_job) +{ + struct xe_tlb_inval_job *job = + container_of(dep_job, typeof(*job), dep); + + /* Pairs with get in xe_tlb_inval_job_push */ + xe_tlb_inval_job_put(job); +} + +static const struct xe_dep_job_ops dep_job_ops = { + .run_job = xe_tlb_inval_job_run, + .free_job = xe_tlb_inval_job_free, +}; + +/** + * xe_tlb_inval_job_create() - TLB invalidation job create + * @q: exec queue issuing the invalidate + * @tlb_inval: TLB invalidation client + * @dep_scheduler: Dependency scheduler for job + * @start: Start address to invalidate + * @end: End address to invalidate + * @asid: Address space ID to invalidate + * + * Create a TLB invalidation job and initialize internal fields. The caller is + * responsible for releasing the creation reference. + * + * Return: TLB invalidation job object on success, ERR_PTR failure + */ +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, u64 start, + u64 end, u32 asid) +{ + struct xe_tlb_inval_job *job; + struct drm_sched_entity *entity = + xe_dep_scheduler_entity(dep_scheduler); + struct xe_tlb_inval_fence *ifence; + int err; + + job = kmalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return ERR_PTR(-ENOMEM); + + job->q = q; + job->tlb_inval = tlb_inval; + job->start = start; + job->end = end; + job->asid = asid; + job->fence_armed = false; + job->dep.ops = &dep_job_ops; + kref_init(&job->refcount); + xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */ + + ifence = kmalloc(sizeof(*ifence), GFP_KERNEL); + if (!ifence) { + err = -ENOMEM; + goto err_job; + } + job->fence = &ifence->base; + + err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL, + q->xef ? q->xef->drm->client_id : 0); + if (err) + goto err_fence; + + /* Pairs with put in xe_tlb_inval_job_destroy */ + xe_pm_runtime_get_noresume(gt_to_xe(q->gt)); + + return job; + +err_fence: + kfree(ifence); +err_job: + xe_exec_queue_put(q); + kfree(job); + + return ERR_PTR(err); +} + +static void xe_tlb_inval_job_destroy(struct kref *ref) +{ + struct xe_tlb_inval_job *job = container_of(ref, typeof(*job), + refcount); + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + struct xe_exec_queue *q = job->q; + struct xe_device *xe = gt_to_xe(q->gt); + + if (!job->fence_armed) + kfree(ifence); + else + /* Ref from xe_tlb_inval_fence_init */ + dma_fence_put(job->fence); + + drm_sched_job_cleanup(&job->dep.drm); + kfree(job); + xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */ + xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */ +} + +/** + * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency + * @job: TLB invalidation job to alloc dependency for + * + * Allocate storage for a dependency in the TLB invalidation fence. This + * function should be called at most once per job and must be paired with + * xe_tlb_inval_job_push being called with a real fence. + * + * Return: 0 on success, -errno on failure + */ +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job) +{ + xe_assert(gt_to_xe(job->q->gt), !xa_load(&job->dep.drm.dependencies, 0)); + might_alloc(GFP_KERNEL); + + return drm_sched_job_add_dependency(&job->dep.drm, + dma_fence_get_stub()); +} + +/** + * xe_tlb_inval_job_push() - TLB invalidation job push + * @job: TLB invalidation job to push + * @m: The migration object being used + * @fence: Dependency for TLB invalidation job + * + * Pushes a TLB invalidation job for execution, using @fence as a dependency. + * Storage for @fence must be preallocated with xe_tlb_inval_job_alloc_dep + * prior to this call if @fence is not signaled. Takes a reference to the job’s + * finished fence, which the caller is responsible for releasing, and return it + * to the caller. This function is safe to be called in the path of reclaim. + * + * Return: Job's finished fence on success, cannot fail + */ +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence) +{ + struct xe_tlb_inval_fence *ifence = + container_of(job->fence, typeof(*ifence), base); + + if (!dma_fence_is_signaled(fence)) { + void *ptr; + + /* + * Can be in path of reclaim, hence the preallocation of fence + * storage in xe_tlb_inval_job_alloc_dep. Verify caller did + * this correctly. + */ + xe_assert(gt_to_xe(job->q->gt), + xa_load(&job->dep.drm.dependencies, 0) == + dma_fence_get_stub()); + + dma_fence_get(fence); /* ref released once dependency processed by scheduler */ + ptr = xa_store(&job->dep.drm.dependencies, 0, fence, + GFP_ATOMIC); + xe_assert(gt_to_xe(job->q->gt), !xa_is_err(ptr)); + } + + xe_tlb_inval_job_get(job); /* Pairs with put in free_job */ + job->fence_armed = true; + + /* + * We need the migration lock to protect the job's seqno and the spsc + * queue, only taken on migration queue, user queues protected dma-resv + * VM lock. + */ + xe_migrate_job_lock(m, job->q); + + /* Creation ref pairs with put in xe_tlb_inval_job_destroy */ + xe_tlb_inval_fence_init(job->tlb_inval, ifence, false); + dma_fence_get(job->fence); /* Pairs with put in DRM scheduler */ + + drm_sched_job_arm(&job->dep.drm); + /* + * caller ref, get must be done before job push as it could immediately + * signal and free. + */ + dma_fence_get(&job->dep.drm.s_fence->finished); + drm_sched_entity_push_job(&job->dep.drm); + + xe_migrate_job_unlock(m, job->q); + + /* + * Not using job->fence, as it has its own dma-fence context, which does + * not allow TLB invalidation fences on the same queue, GT tuple to + * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler + * context and job's finished fence, which enables squashing. + */ + return &job->dep.drm.s_fence->finished; +} + +/** + * xe_tlb_inval_job_get() - Get a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Increment the TLB invalidation job's reference count + */ +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job) +{ + kref_get(&job->refcount); +} + +/** + * xe_tlb_inval_job_put() - Put a reference to TLB invalidation job + * @job: TLB invalidation job object + * + * Decrement the TLB invalidation job's reference count, call + * xe_tlb_inval_job_destroy when reference count == 0. Skips decrement if + * input @job is NULL or IS_ERR. + */ +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job) +{ + if (!IS_ERR_OR_NULL(job)) + kref_put(&job->refcount, xe_tlb_inval_job_destroy); +} diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h new file mode 100644 index 000000000000..e63edcb26b50 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_JOB_H_ +#define _XE_TLB_INVAL_JOB_H_ + +#include <linux/types.h> + +struct dma_fence; +struct xe_dep_scheduler; +struct xe_exec_queue; +struct xe_tlb_inval; +struct xe_tlb_inval_job; +struct xe_migrate; + +struct xe_tlb_inval_job * +xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval, + struct xe_dep_scheduler *dep_scheduler, + u64 start, u64 end, u32 asid); + +int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job); + +struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job, + struct xe_migrate *m, + struct dma_fence *fence); + +void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job); + +void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job); + +#endif diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h new file mode 100644 index 000000000000..8f8b060e9005 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_TLB_INVAL_TYPES_H_ +#define _XE_TLB_INVAL_TYPES_H_ + +#include <linux/workqueue.h> +#include <linux/dma-fence.h> + +struct xe_tlb_inval; + +/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */ +struct xe_tlb_inval_ops { + /** + * @all: Invalidate all TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*all)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ggtt: Invalidate global translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ggtt)(struct xe_tlb_inval *tlb_inval, u32 seqno); + + /** + * @ppgtt: Invalidate per-process translation TLBs + * @tlb_inval: TLB invalidation client + * @seqno: Seqno of TLB invalidation + * @start: Start address + * @end: End address + * @asid: Address space ID + * + * Return 0 on success, -ECANCELED if backend is mid-reset, error on + * failure + */ + int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start, + u64 end, u32 asid); + + /** + * @initialized: Backend is initialized + * @tlb_inval: TLB invalidation client + * + * Return: True if back is initialized, False otherwise + */ + bool (*initialized)(struct xe_tlb_inval *tlb_inval); + + /** + * @flush: Flush pending TLB invalidations + * @tlb_inval: TLB invalidation client + */ + void (*flush)(struct xe_tlb_inval *tlb_inval); + + /** + * @timeout_delay: Timeout delay for TLB invalidation + * @tlb_inval: TLB invalidation client + * + * Return: Timeout delay for TLB invalidation in jiffies + */ + long (*timeout_delay)(struct xe_tlb_inval *tlb_inval); +}; + +/** struct xe_tlb_inval - TLB invalidation client (frontend) */ +struct xe_tlb_inval { + /** @private: Backend private pointer */ + void *private; + /** @xe: Pointer to Xe device */ + struct xe_device *xe; + /** @ops: TLB invalidation ops */ + const struct xe_tlb_inval_ops *ops; + /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */ +#define TLB_INVALIDATION_SEQNO_MAX 0x100000 + int seqno; + /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */ + struct mutex seqno_lock; + /** + * @seqno_recv: last received TLB invalidation seqno, protected by + * CT lock + */ + int seqno_recv; + /** + * @pending_fences: list of pending fences waiting TLB invaliations, + * protected CT lock + */ + struct list_head pending_fences; + /** + * @pending_lock: protects @pending_fences and updating @seqno_recv. + */ + spinlock_t pending_lock; + /** + * @fence_tdr: schedules a delayed call to xe_tlb_fence_timeout after + * the timeout interval is over. + */ + struct delayed_work fence_tdr; + /** @job_wq: schedules TLB invalidation jobs */ + struct workqueue_struct *job_wq; + /** @tlb_inval.lock: protects TLB invalidation fences */ + spinlock_t lock; +}; + +/** + * struct xe_tlb_inval_fence - TLB invalidation fence + * + * Optionally passed to xe_tlb_inval* functions and will be signaled upon TLB + * invalidation completion. + */ +struct xe_tlb_inval_fence { + /** @base: dma fence base */ + struct dma_fence base; + /** @tlb_inval: TLB invalidation client which fence belong to */ + struct xe_tlb_inval *tlb_inval; + /** @link: link into list of pending tlb fences */ + struct list_head link; + /** @seqno: seqno of TLB invalidation to signal fence one */ + int seqno; + /** @inval_time: time of TLB invalidation */ + ktime_t inval_time; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index b4a3577df70c..314f42fcbcbd 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -14,10 +14,10 @@ #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" -#include "xe_gt_tlb_invalidation_types.h" #include "xe_gt_types.h" #include "xe_guc_exec_queue_types.h" #include "xe_sched_job.h" +#include "xe_tlb_inval_types.h" #include "xe_vm.h" #define __dev_name_xe(xe) dev_name((xe)->drm.dev) @@ -25,13 +25,13 @@ #define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) #define __dev_name_eq(q) __dev_name_gt((q)->gt) -DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DECLARE_EVENT_CLASS(xe_tlb_inval_fence, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence), TP_STRUCT__entry( __string(dev, __dev_name_xe(xe)) - __field(struct xe_gt_tlb_invalidation_fence *, fence) + __field(struct xe_tlb_inval_fence *, fence) __field(int, seqno) ), @@ -45,39 +45,23 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, __get_str(dev), __entry->fence, __entry->seqno) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_send, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, - xe_gt_tlb_invalidation_fence_work_func, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_recv, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_signal, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), - TP_ARGS(xe, fence) -); - -DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, - TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), +DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_timeout, + TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence), TP_ARGS(xe, fence) ); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d9c9d2547aad..dc588255674d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -25,6 +25,7 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" +#include "xe_vram.h" struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; @@ -82,15 +83,16 @@ static u32 get_wopcm_size(struct xe_device *xe) static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { - struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; + resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; - tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; - tile_size = tile->mem.vram.actual_physical_size; + tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); + tile_size = xe_vram_region_actual_physical_size(tile_vram); /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; @@ -107,7 +109,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) - mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; + mgr->io_base = tile_io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end @@ -164,7 +166,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr stolen_size -= wopcm_size; - if (media_gt && XE_WA(media_gt, 14019821291)) { + if (media_gt && XE_GT_WA(media_gt, 14019821291)) { u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) & ~GENMASK_ULL(5, 0); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 9e375a40aee9..9175b4a2214b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -15,6 +15,7 @@ #include "xe_gt.h" #include "xe_res_cursor.h" #include "xe_ttm_vram_mgr.h" +#include "xe_vram_types.h" static inline struct drm_buddy_block * xe_ttm_vram_mgr_first_block(struct list_head *list) @@ -337,13 +338,20 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr); } -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr) +/** + * xe_ttm_vram_mgr_init - initialize TTM VRAM region + * @xe: pointer to Xe device + * @vram: pointer to xe_vram_region that contains the memory region attributes + * + * Initialize the Xe TTM for given @vram region using the given parameters. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram) { - struct xe_device *xe = tile_to_xe(tile); - struct xe_vram_region *vram = &tile->mem.vram; - - return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id, - vram->usable_size, vram->io_size, + return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement, + xe_vram_region_usable_size(vram), + xe_vram_region_io_size(vram), PAGE_SIZE); } @@ -392,7 +400,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, */ xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { - phys_addr_t phys = cursor.start + tile->mem.vram.io_start; + phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram); size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr; diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index cc76050e376d..87b7fae5edba 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -11,11 +11,12 @@ enum dma_data_direction; struct xe_device; struct xe_tile; +struct xe_vram_region; int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr, u32 mem_type, u64 size, u64 io_size, u64 default_page_size); -int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr); +int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram); int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, struct ttm_resource *res, u64 offset, u64 length, diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 828b45b24c23..a524170a04d0 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -99,7 +99,7 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, XE_RTP_END_VERSION_UNDEFINED), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) }, diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; @@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; + err = xe_guc_opt_in_features_enable(&uc->guc); + if (err) + goto err_out; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - return err; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) @@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; -} -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,11 +8,10 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 6d0869518652..622b76078567 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,10 +115,11 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ @@ -127,6 +128,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ @@ -326,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } -static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; @@ -341,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) return -EINVAL; } - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version); - uc_fw->private_data_size = css->private_data_size; + uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info); + uc_fw->private_data_size = guc_info->private_data_size; return 0; } @@ -414,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t css = (struct uc_css_header *)fw_data; /* Check integrity of size values inside CSS header */ - size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw - - css->exponent_size_dw) * sizeof(u32); + size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw - + css->rsa_info.exponent_size_dw) * sizeof(u32); if (unlikely(size != sizeof(struct uc_css_header))) { drm_warn(&xe->drm, "%s firmware %s: unexpected header size: %zu != %zu\n", @@ -428,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); /* now RSA */ - uc_fw->rsa_size = css->key_size_dw * sizeof(u32); + uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32); /* At least, it should have header, uCode and RSA. Size of all three. */ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + @@ -441,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t } /* Get version numbers from the CSS header */ - release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version); - release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version); - release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); + release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version); + release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version); + release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) - return guc_read_css_info(uc_fw, css); + return guc_read_css_info(uc_fw, &css->guc_info); return 0; } diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h index 87ade41209d0..3c9a63d13032 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h @@ -44,6 +44,39 @@ * in fw. So driver will load a truncated firmware in this case. */ +struct uc_css_rsa_info { + u32 key_size_dw; + u32 modulus_size_dw; + u32 exponent_size_dw; +} __packed; + +struct uc_css_guc_info { + u32 time; +#define CSS_TIME_HOUR (0xFF << 0) +#define CSS_TIME_MIN (0xFF << 8) +#define CSS_TIME_SEC (0xFFFF << 16) + u32 reserved0[5]; + u32 sw_version; +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) + u32 submission_version; + u32 reserved1[11]; + u32 header_info; +#define CSS_HEADER_INFO_SVN (0xFF) +#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31) + u32 private_data_size; + u32 ukernel_info; +#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16) +#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8) +#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2) +#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0 +#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1 +#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2 +#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1) +#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0) +} __packed; + struct uc_css_header { u32 module_type; /* @@ -52,36 +85,21 @@ struct uc_css_header { */ u32 header_size_dw; u32 header_version; - u32 module_id; + u32 reserved0; u32 module_vendor; u32 date; -#define CSS_DATE_DAY (0xFF << 0) -#define CSS_DATE_MONTH (0xFF << 8) -#define CSS_DATE_YEAR (0xFFFF << 16) +#define CSS_DATE_DAY (0xFF << 0) +#define CSS_DATE_MONTH (0xFF << 8) +#define CSS_DATE_YEAR (0xFFFF << 16) u32 size_dw; /* uCode plus header_size_dw */ - u32 key_size_dw; - u32 modulus_size_dw; - u32 exponent_size_dw; - u32 time; -#define CSS_TIME_HOUR (0xFF << 0) -#define CSS_DATE_MIN (0xFF << 8) -#define CSS_DATE_SEC (0xFFFF << 16) - char username[8]; - char buildnumber[12]; - u32 sw_version; -#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) union { - u32 submission_version; /* only applies to GuC */ - u32 reserved2; + u32 reserved1[3]; + struct uc_css_rsa_info rsa_info; }; - u32 reserved0[12]; union { - u32 private_data_size; /* only applies to GuC */ - u32 reserved1; + u32 reserved2[22]; + struct uc_css_guc_info guc_info; }; - u32 header_info; } __packed; static_assert(sizeof(struct uc_css_header) == 128); @@ -318,4 +336,70 @@ struct gsc_manifest_header { u32 exponent_size; /* in dwords */ } __packed; +/** + * DOC: Late binding Firmware Layout + * + * The Late binding binary starts with FPT header, which contains locations + * of various partitions of the binary. Here we're interested in finding out + * manifest version. To the manifest version, we need to locate CPD header + * one of the entry in CPD header points to manifest header. Manifest header + * contains the version. + * + * +================================================+ + * | FPT Header | + * +================================================+ + * | FPT entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES" | + * | ... | + * | offset >-----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | CPD Header |<-----o + * +================================================+ + * | CPD entries[] | + * | entry1 | + * | ... | + * | entryX | + * | "LTES.man" | + * | ... | + * | offset >----------------------------|------o + * +================================================+ | + * | + * +================================================+ | + * | Manifest Header |<-----o + * | ... | + * | FW version | + * | ... | + * +================================================+ + */ + +/* FPT Headers */ +struct csc_fpt_header { + u32 header_marker; +#define CSC_FPT_HEADER_MARKER 0x54504624 + u32 num_of_entries; + u8 header_version; + u8 entry_version; + u8 header_length; /* in bytes */ + u8 flags; + u16 ticks_to_add; + u16 tokens_to_add; + u32 uma_size; + u32 crc32; + struct gsc_version fitc_version; +} __packed; + +struct csc_fpt_entry { + u8 name[4]; /* partition name */ + u32 reserved1; + u32 offset; /* offset from beginning of CSE region */ + u32 length; /* partition length in bytes */ + u32 reserved2[3]; + u32 partition_flags; +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index 914026015019..77a1dcf8b4ed 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -147,6 +147,9 @@ struct xe_uc_fw { /** @private_data_size: size of private data found in uC css header */ u32 private_data_size; + + /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */ + u32 build_type; }; #endif diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c new file mode 100644 index 000000000000..f16e92cd8090 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_userptr.h" + +#include <linux/mm.h> + +#include "xe_trace_bo.h" + +/** + * xe_vma_userptr_check_repin() - Advisory check for repin needed + * @uvma: The userptr vma + * + * Check if the userptr vma has been invalidated since last successful + * repin. The check is advisory only and can the function can be called + * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the + * vma userptr will remain valid after a lockless check, so typically + * the call needs to be followed by a proper check under the notifier_lock. + * + * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + */ +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +{ + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq) ? + -EAGAIN : 0; +} + +/** + * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs + * that need repinning. + * @vm: The VM. + * + * This function checks for whether the VM has userptrs that need repinning, + * and provides a release-type barrier on the svm.gpusvm.notifier_lock after + * checking. + * + * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. + */ +int __xe_vm_userptr_needs_repin(struct xe_vm *vm) +{ + lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock); + + return (list_empty(&vm->userptr.repin_list) && + list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) +{ + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + struct drm_gpusvm_ctx ctx = { + .read_only = xe_vma_read_only(vma), + .device_private_page_owner = NULL, + }; + + lockdep_assert_held(&vm->lock); + xe_assert(xe, xe_vma_is_userptr(vma)); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + uvma->userptr.notifier.mm, + &uvma->userptr.notifier, + xe_vma_userptr(vma), + xe_vma_userptr(vma) + xe_vma_size(vma), + &ctx); +} + +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + struct dma_resv_iter cursor; + struct dma_fence *fence; + struct drm_gpusvm_ctx ctx = { + .in_notifier = true, + .read_only = xe_vma_read_only(vma), + }; + long err; + + /* + * Tell exec and rebind worker they need to repin and rebind this + * userptr. + */ + if (!xe_vm_in_fault_mode(vm) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + spin_lock(&vm->userptr.invalidated_lock); + list_move_tail(&userptr->invalidate_link, + &vm->userptr.invalidated); + spin_unlock(&vm->userptr.invalidated_lock); + } + + /* + * Preempt fences turn into schedule disables, pipeline these. + * Note that even in fault mode, we need to wait for binds and + * unbinds to complete, and those are attached as BOOKMARK fences + * to the vm. + */ + dma_resv_iter_begin(&cursor, xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) + dma_fence_enable_sw_signaling(fence); + dma_resv_iter_end(&cursor); + + err = dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + XE_WARN_ON(err <= 0); + + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { + err = xe_vm_invalidate_vma(vma); + XE_WARN_ON(err); + } + + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(vma) >> PAGE_SHIFT, &ctx); +} + +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->svm.gpusvm.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->svm.gpusvm.notifier_lock); + trace_xe_vma_userptr_invalidate_complete(vma); + + return true; +} + +static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { + .invalidate = vma_userptr_invalidate, +}; + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->svm.gpusvm.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.pages.notifier_seq)) + uvma->userptr.pages.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + +int xe_vm_userptr_pin(struct xe_vm *vm) +{ + struct xe_userptr_vma *uvma, *next; + int err = 0; + + xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); + lockdep_assert_held_write(&vm->lock); + + /* Collect invalidated userptrs */ + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, + userptr.invalidate_link) { + list_del_init(&uvma->userptr.invalidate_link); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); + } + spin_unlock(&vm->userptr.invalidated_lock); + + /* Pin and move to bind list */ + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); + if (err == -EFAULT) { + list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); + + /* Wait for pending binds */ + xe_vm_lock(vm, false); + dma_resv_wait_timeout(xe_vm_resv(vm), + DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); + + down_read(&vm->svm.gpusvm.notifier_lock); + err = xe_vm_invalidate_vma(&uvma->vma); + up_read(&vm->svm.gpusvm.notifier_lock); + xe_vm_unlock(vm); + if (err) + break; + } else { + if (err) + break; + + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, + &vm->rebind_list); + } + } + + if (err) { + down_write(&vm->svm.gpusvm.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->svm.gpusvm.notifier_lock); + } + return err; +} + +/** + * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs + * that need repinning. + * @vm: The VM. + * + * This function does an advisory check for whether the VM has userptrs that + * need repinning. + * + * Return: 0 if there are no indications of userptrs needing repinning, + * -EAGAIN if there are. + */ +int xe_vm_userptr_check_repin(struct xe_vm *vm) +{ + return (list_empty_careful(&vm->userptr.repin_list) && + list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; +} + +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range) +{ + struct xe_userptr *userptr = &uvma->userptr; + int err; + + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); + + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, + start, range, + &vma_userptr_notifier_ops); + if (err) + return err; + + userptr->pages.notifier_seq = LONG_MAX; + + return 0; +} + +void xe_userptr_remove(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + struct xe_userptr *userptr = &uvma->userptr; + + drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages, + xe_vma_size(&uvma->vma) >> PAGE_SHIFT); + + /* + * Since userptr pages are not pinned, we can't remove + * the notifier until we're sure the GPU is not accessing + * them anymore + */ + mmu_interval_notifier_remove(&userptr->notifier); +} + +void xe_userptr_destroy(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link)); + list_del(&uvma->userptr.invalidate_link); + spin_unlock(&vm->userptr.invalidated_lock); +} diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h new file mode 100644 index 000000000000..ef801234991e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_userptr.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_USERPTR_H_ +#define _XE_USERPTR_H_ + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/scatterlist.h> +#include <linux/spinlock.h> + +#include <drm/drm_gpusvm.h> + +struct xe_vm; +struct xe_vma; +struct xe_userptr_vma; + +/** struct xe_userptr_vm - User pointer VM level state */ +struct xe_userptr_vm { + /** + * @userptr.repin_list: list of VMAs which are user pointers, + * and needs repinning. Protected by @lock. + */ + struct list_head repin_list; + /** + * @userptr.invalidated_lock: Protects the + * @userptr.invalidated list. + */ + spinlock_t invalidated_lock; + /** + * @userptr.invalidated: List of invalidated userptrs, not yet + * picked + * up for revalidation. Protected from access with the + * @invalidated_lock. Removing items from the list + * additionally requires @lock in write mode, and adding + * items to the list requires either the @svm.gpusvm.notifier_lock in + * write mode, OR @lock in write mode. + */ + struct list_head invalidated; +}; + +/** struct xe_userptr - User pointer */ +struct xe_userptr { + /** @invalidate_link: Link for the vm::userptr.invalidated list */ + struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; + /** + * @pages: gpusvm pages for this user pointer. + */ + struct drm_gpusvm_pages pages; + /** + * @notifier: MMU notifier for user pointer (invalidation call back) + */ + struct mmu_interval_notifier notifier; + + /** + * @initial_bind: user pointer has been bound at least once. + * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held. + * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held. + */ + bool initial_bind; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) + u32 divisor; +#endif +}; + +#if IS_ENABLED(CONFIG_DRM_GPUSVM) +void xe_userptr_remove(struct xe_userptr_vma *uvma); +int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start, + unsigned long range); +void xe_userptr_destroy(struct xe_userptr_vma *uvma); + +int xe_vm_userptr_pin(struct xe_vm *vm); +int __xe_vm_userptr_needs_repin(struct xe_vm *vm); +int xe_vm_userptr_check_repin(struct xe_vm *vm); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); +#else +static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {} + +static inline int xe_userptr_setup(struct xe_userptr_vma *uvma, + unsigned long start, unsigned long range) +{ + return -ENODEV; +} + +static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {} + +static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; } +static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; } +static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; } +static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; }; +#endif + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c new file mode 100644 index 000000000000..826cd09966ef --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ +#include "xe_bo.h" +#include <drm/drm_exec.h> +#include <drm/drm_gem.h> +#include <drm/drm_gpuvm.h> + +#include "xe_assert.h" +#include "xe_validation.h" + +#ifdef CONFIG_DRM_XE_DEBUG +/** + * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable + * for validation. + * @xe: Pointer to the xe device. + * @exec: The drm_exec pointer to check. + * @obj: Pointer to the object subject to validation. + * + * NULL exec pointers are not allowed. + * For XE_VALIDATION_UNIMPLEMENTED, no checking. + * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test + * For XE_VALIDATION_UNSUPPORTED, check that the object subject to + * validation is a dma-buf, for which support for ww locking is + * not in place in the dma-buf layer. + */ +void xe_validation_assert_exec(const struct xe_device *xe, + const struct drm_exec *exec, + const struct drm_gem_object *obj) +{ + xe_assert(xe, exec); + if (IS_ERR(exec)) { + switch (PTR_ERR(exec)) { + case __XE_VAL_UNIMPLEMENTED: + break; + case __XE_VAL_UNSUPPORTED: + xe_assert(xe, !!obj->dma_buf); + break; +#if IS_ENABLED(CONFIG_KUNIT) + case __XE_VAL_OPT_OUT: + xe_assert(xe, current->kunit_test); + break; +#endif + default: + xe_assert(xe, false); + } + } +} +#endif + +static int xe_validation_lock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + int ret = 0; + + if (ctx->val_flags.interruptible) { + if (ctx->request_exclusive) + ret = down_write_killable(&val->lock); + else + ret = down_read_interruptible(&val->lock); + } else { + if (ctx->request_exclusive) + down_write(&val->lock); + else + down_read(&val->lock); + } + + if (!ret) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return ret; +} + +static int xe_validation_trylock(struct xe_validation_ctx *ctx) +{ + struct xe_validation_device *val = ctx->val; + bool locked; + + if (ctx->request_exclusive) + locked = down_write_trylock(&val->lock); + else + locked = down_read_trylock(&val->lock); + + if (locked) { + ctx->lock_held = true; + ctx->lock_held_exclusive = ctx->request_exclusive; + } + + return locked ? 0 : -EWOULDBLOCK; +} + +static void xe_validation_unlock(struct xe_validation_ctx *ctx) +{ + if (!ctx->lock_held) + return; + + if (ctx->lock_held_exclusive) + up_write(&ctx->val->lock); + else + up_read(&ctx->val->lock); + + ctx->lock_held = false; +} + +/** + * xe_validation_ctx_init() - Initialize an xe_validation_ctx + * @ctx: The xe_validation_ctx to initialize. + * @val: The xe_validation_device representing the validation domain. + * @exec: The struct drm_exec to use for the transaction. May be NULL. + * @flags: The flags to use for initialization. + * + * Initialize and lock a an xe_validation transaction using the validation domain + * represented by @val. Also initialize the drm_exec object forwarding parts of + * @flags to the drm_exec initialization. The @flags.exclusive flag should + * typically be set to false to avoid locking out other validators from the + * domain until an OOM is hit. For testing- or final attempt purposes it can, + * however, be set to true. + * + * Return: %0 on success, %-EINTR if interruptible initial locking failed with a + * signal pending. If @flags.no_block is set to true, a failed trylock + * returns %-EWOULDBLOCK. + */ +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags) +{ + int ret; + + ctx->exec = exec; + ctx->val = val; + ctx->lock_held = false; + ctx->lock_held_exclusive = false; + ctx->request_exclusive = flags.exclusive; + ctx->val_flags = flags; + ctx->exec_flags = 0; + ctx->nr = 0; + + if (flags.no_block) + ret = xe_validation_trylock(ctx); + else + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + if (exec) { + if (flags.interruptible) + ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT; + if (flags.exec_ignore_duplicates) + ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES; + drm_exec_init(exec, ctx->exec_flags, ctx->nr); + } + + return 0; +} + +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH +/* + * This abuses both drm_exec and ww_mutex internals and should be + * replaced by checking for -EDEADLK when we can make TTM + * stop converting -EDEADLK to -ENOMEM. + * An alternative is to not have exhaustive eviction with + * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens. + */ +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return !!exec->ticket.contending_lock; +} + +#else + +static bool xe_validation_contention_injected(struct drm_exec *exec) +{ + return false; +} + +#endif + +static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret) +{ + if (ret == -ENOMEM && + ((ctx->request_exclusive && + xe_validation_contention_injected(ctx->exec)) || + !ctx->request_exclusive)) { + ctx->request_exclusive = true; + return true; + } + + return false; +} + +/** + * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation + * transaction. + * @ctx: An uninitialized xe_validation_ctx. + * @vm_exec: An initialized struct vm_exec. + * @val: The validation domain. + * + * The drm_gpuvm_exec_lock() function internally initializes its drm_exec + * transaction and therefore doesn't lend itself very well to be using + * xe_validation_ctx_init(). Provide a helper that takes an uninitialized + * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry. + * + * Return: %0 on success, negative error code on failure. + */ +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, + struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val) +{ + int ret; + + memset(ctx, 0, sizeof(*ctx)); + ctx->exec = &vm_exec->exec; + ctx->exec_flags = vm_exec->flags; + ctx->val = val; + if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT) + ctx->val_flags.interruptible = 1; + if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES) + ctx->val_flags.exec_ignore_duplicates = 1; +retry: + ret = xe_validation_lock(ctx); + if (ret) + return ret; + + ret = drm_gpuvm_exec_lock(vm_exec); + if (ret) { + xe_validation_unlock(ctx); + if (__xe_validation_should_retry(ctx, ret)) + goto retry; + } + + return ret; +} + +/** + * xe_validation_ctx_fini() - Finalize a validation transaction + * @ctx: The Validation transaction to finalize. + * + * Finalize a validation transaction and its related drm_exec transaction. + */ +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx) +{ + if (ctx->exec) + drm_exec_fini(ctx->exec); + xe_validation_unlock(ctx); +} + +/** + * xe_validation_should_retry() - Determine if a validation transaction should retry + * @ctx: The validation transaction. + * @ret: Pointer to a return value variable. + * + * Determines whether a validation transaction should retry based on the + * internal transaction state and the return value pointed to by @ret. + * If a validation should be retried, the transaction is prepared for that, + * and the validation locked might be re-locked in exclusive mode, and *@ret + * is set to %0. If the re-locking errors, typically due to interruptible + * locking with signal pending, *@ret is instead set to -EINTR and the + * function returns %false. + * + * Return: %true if validation should be retried, %false otherwise. + */ +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret) +{ + if (__xe_validation_should_retry(ctx, *ret)) { + drm_exec_fini(ctx->exec); + *ret = 0; + if (ctx->request_exclusive != ctx->lock_held_exclusive) { + xe_validation_unlock(ctx); + *ret = xe_validation_lock(ctx); + } + drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr); + return !*ret; + } + + return false; +} diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h new file mode 100644 index 000000000000..fec331d791e7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_VALIDATION_H_ +#define _XE_VALIDATION_H_ + +#include <linux/dma-resv.h> +#include <linux/types.h> +#include <linux/rwsem.h> + +struct drm_exec; +struct drm_gem_object; +struct drm_gpuvm_exec; +struct xe_device; + +#ifdef CONFIG_PROVE_LOCKING +/** + * xe_validation_lockdep() - Assert that a drm_exec locking transaction can + * be initialized at this point. + */ +static inline void xe_validation_lockdep(void) +{ + struct ww_acquire_ctx ticket; + + ww_acquire_init(&ticket, &reservation_ww_class); + ww_acquire_fini(&ticket); +} +#else +static inline void xe_validation_lockdep(void) +{ +} +#endif + +/* + * Various values of the drm_exec pointer where we've not (yet) + * implemented full ww locking. + * + * XE_VALIDATION_UNIMPLEMENTED means implementation is pending. + * A lockdep check is made to assure that a drm_exec locking + * transaction can actually take place where the macro is + * used. If this asserts, the exec pointer needs to be assigned + * higher up in the callchain and passed down. + * + * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where + * the dma-buf layer doesn't support WW locking. + * + * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where + * exhaustive eviction isn't necessary. + */ +#define __XE_VAL_UNIMPLEMENTED -EINVAL +#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED)) + +#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP +#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED)) + +#define __XE_VAL_OPT_OUT -ENOMEM +#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \ + (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT)) +#ifdef CONFIG_DRM_XE_DEBUG +void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec, + const struct drm_gem_object *obj); +#else +#define xe_validation_assert_exec(_xe, _exec, _obj) \ + do { \ + (void)_xe; (void)_exec; (void)_obj; \ + } while (0) +#endif + +/** + * struct xe_validation_device - The domain for exhaustive eviction + * @lock: The lock used to exclude other processes from allocating graphics memory + * + * The struct xe_validation_device represents the domain for which we want to use + * exhaustive eviction. The @lock is typically grabbed in read mode for allocations + * but when graphics memory allocation fails, it is retried with the write mode held. + */ +struct xe_validation_device { + struct rw_semaphore lock; +}; + +/** + * struct xe_val_flags - Flags for xe_validation_ctx_init(). + * @exclusive: Start the validation transaction by locking out all other validators. + * @no_block: Don't block on initialization. + * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec + * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag. + * @exec_ignore_duplicates: Initialize the drm_exec context with the + * DRM_EXEC_IGNORE_DUPLICATES flag. + */ +struct xe_val_flags { + u32 exclusive :1; + u32 no_block :1; + u32 interruptible :1; + u32 exec_ignore_duplicates :1; +}; + +/** + * struct xe_validation_ctx - A struct drm_exec subclass with support for + * exhaustive eviction + * @exec: The drm_exec object base class. Note that we use a pointer instead of + * embedding to avoid diamond inheritance. + * @val: The exhaustive eviction domain. + * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init. + * @lock_held: Whether The domain lock is currently held. + * @lock_held_exclusive: Whether the domain lock is held in exclusive mode. + * @request_exclusive: Whether to lock exclusively (write mode) the next time + * the domain lock is locked. + * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization. + * @nr: The drm_exec nr parameter used for drm_exec (re-)initializaiton. + */ +struct xe_validation_ctx { + struct drm_exec *exec; + struct xe_validation_device *val; + struct xe_val_flags val_flags; + bool lock_held; + bool lock_held_exclusive; + bool request_exclusive; + u32 exec_flags; + unsigned int nr; +}; + +int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val, + struct drm_exec *exec, const struct xe_val_flags flags); + +int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec, + struct xe_validation_device *val); + +void xe_validation_ctx_fini(struct xe_validation_ctx *ctx); + +bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret); + +/** + * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction + * @_ctx: Pointer to the xe_validation_ctx + * @_ret: The current error value possibly holding -ENOMEM + * + * Use this in way similar to drm_exec_retry_on_contention(). + * If @_ret contains -ENOMEM the tranaction is restarted once in a way that + * blocks other transactions and allows exhastive eviction. If the transaction + * was already restarted once, Just return the -ENOMEM. May also set + * _ret to -EINTR if not retrying and waits are interruptible. + * May only be used within a drm_exec_until_all_locked() loop. + */ +#define xe_validation_retry_on_oom(_ctx, _ret) \ + do { \ + if (xe_validation_should_retry(_ctx, _ret)) \ + goto *__drm_exec_retry_ptr; \ + } while (0) + +/** + * xe_validation_device_init - Initialize a struct xe_validation_device + * @val: The xe_validation_device to init. + */ +static inline void +xe_validation_device_init(struct xe_validation_device *val) +{ + init_rwsem(&val->lock); +} + +/* + * Make guard() and scoped_guard() work with xe_validation_ctx + * so that we can exit transactions without caring about the + * cleanup. + */ +DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, + if (_T) xe_validation_ctx_fini(_T);, + ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + _ret ? NULL : _ctx; }), + struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, + struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); +static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) +{return *_T; } +#define class_xe_validation_is_conditional true + +/** + * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction + * @_ctx: The xe_validation_ctx. + * @_val: The xe_validation_device. + * @_exec: The struct drm_exec object + * @_flags: Flags for the xe_validation_ctx initialization. + * @_ret: Return in / out parameter. May be set by this macro. Typicall 0 when called. + * + * This macro is will initiate a drm_exec transaction with additional support for + * exhaustive eviction. + */ +#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + drm_exec_until_all_locked(_exec) + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 04d1a43b81e3..63c65e3d207b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -28,7 +28,6 @@ #include "xe_drm_client.h" #include "xe_exec_queue.h" #include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_pat.h" #include "xe_pm.h" @@ -38,9 +37,10 @@ #include "xe_res_cursor.h" #include "xe_svm.h" #include "xe_sync.h" +#include "xe_tile.h" +#include "xe_tlb_inval.h" #include "xe_trace_bo.h" #include "xe_wa.h" -#include "xe_hmm.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { @@ -48,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) } /** - * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @uvma: The userptr vma + * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction + * @vm: The vm whose resv is to be locked. + * @exec: The drm_exec transaction. * - * Check if the userptr vma has been invalidated since last successful - * repin. The check is advisory only and can the function can be called - * without the vm->userptr.notifier_lock held. There is no guarantee that the - * vma userptr will remain valid after a lockless check, so typically - * the call needs to be followed by a proper check under the notifier_lock. + * Helper to lock the vm's resv as part of a drm_exec transaction. * - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. + * Return: %0 on success. See drm_exec_lock_obj() for error codes. */ -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec) { - return mmu_interval_check_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq) ? - -EAGAIN : 0; -} - -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) -{ - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - lockdep_assert_held(&vm->lock); - xe_assert(xe, xe_vma_is_userptr(vma)); - - return xe_hmm_userptr_populate_range(uvma, false); + return drm_exec_lock_obj(exec, xe_vm_obj(vm)); } static bool preempt_fences_waiting(struct xe_vm *vm) @@ -227,6 +210,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) .num_fences = 1, }; struct drm_exec *exec = &vm_exec.exec; + struct xe_validation_ctx ctx; struct dma_fence *pfence; int err; bool wait; @@ -234,14 +218,14 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); down_write(&vm->lock); - err = drm_gpuvm_exec_lock(&vm_exec); + err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val); if (err) goto out_up_write; pfence = xe_preempt_fence_create(q, q->lr.context, ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; + if (IS_ERR(pfence)) { + err = PTR_ERR(pfence); goto out_fini; } @@ -249,7 +233,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) ++vm->preempt.num_exec_queues; q->lr.pfence = pfence; - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); @@ -263,10 +247,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) if (wait) dma_fence_enable_sw_signaling(pfence); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_fini: - drm_exec_fini(exec); + xe_validation_ctx_fini(&ctx); out_up_write: up_write(&vm->lock); @@ -299,25 +283,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) up_write(&vm->lock); } -/** - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs - * that need repinning. - * @vm: The VM. - * - * This function checks for whether the VM has userptrs that need repinning, - * and provides a release-type barrier on the userptr.notifier_lock after - * checking. - * - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. - */ -int __xe_vm_userptr_needs_repin(struct xe_vm *vm) -{ - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - return (list_empty(&vm->userptr.repin_list) && - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 /** @@ -349,39 +314,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked) /* TODO: Inform user the VM is banned */ } -/** - * xe_vm_validate_should_retry() - Whether to retry after a validate error. - * @exec: The drm_exec object used for locking before validation. - * @err: The error returned from ttm_bo_validate(). - * @end: A ktime_t cookie that should be set to 0 before first use and - * that should be reused on subsequent calls. - * - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Check if that is necessary, and - * if so unlock the drm_exec's objects while keeping the ticket to prepare - * for a rerun. - * - * Return: true if a retry after drm_exec_init() is recommended; - * false otherwise. - */ -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) -{ - ktime_t cur; - - if (err != -ENOMEM) - return false; - - cur = ktime_get(); - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (!ktime_before(cur, *end)) - return false; - - msleep(20); - return true; -} - static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); @@ -393,7 +325,10 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, &vm->rebind_list); - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); + if (!try_wait_for_completion(&vm->xe->pm_block)) + return -EAGAIN; + + ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec); if (ret) return ret; @@ -479,13 +414,40 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } +static bool vm_suspend_rebind_worker(struct xe_vm *vm) +{ + struct xe_device *xe = vm->xe; + bool ret = false; + + mutex_lock(&xe->rebind_resume_lock); + if (!try_wait_for_completion(&vm->xe->pm_block)) { + ret = true; + list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list); + } + mutex_unlock(&xe->rebind_resume_lock); + + return ret; +} + +/** + * xe_vm_resume_rebind_worker() - Resume the rebind worker. + * @vm: The vm whose preempt worker to resume. + * + * Resume a preempt worker that was previously suspended by + * vm_suspend_rebind_worker(). + */ +void xe_vm_resume_rebind_worker(struct xe_vm *vm) +{ + queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work); +} + static void preempt_rebind_work_func(struct work_struct *w) { struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); + struct xe_validation_ctx ctx; struct drm_exec exec; unsigned int fence_count = 0; LIST_HEAD(preempt_fences); - ktime_t end = 0; int err = 0; long wait; int __maybe_unused tries = 0; @@ -502,24 +464,30 @@ static void preempt_rebind_work_func(struct work_struct *w) } retry: + if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) { + up_write(&vm->lock); + return; + } + if (xe_vm_userptr_check_repin(vm)) { err = xe_vm_userptr_pin(vm); if (err) goto out_unlock_outer; } - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}); + if (err) + goto out_unlock_outer; drm_exec_until_all_locked(&exec) { bool done = false; err = xe_preempt_work_begin(&exec, vm, &done); drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); if (err || done) { - drm_exec_fini(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - + xe_validation_ctx_fini(&ctx); goto out_unlock_outer; } } @@ -528,7 +496,9 @@ retry: if (err) goto out_unlock; + xe_vm_set_validation_exec(vm, &exec); err = xe_vm_rebind(vm, true); + xe_vm_set_validation_exec(vm, NULL); if (err) goto out_unlock; @@ -546,9 +516,9 @@ retry: (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ __xe_vm_userptr_needs_repin(__vm)) - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); if (retry_required(tries, vm)) { - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); err = -EAGAIN; goto out_unlock; } @@ -562,10 +532,10 @@ retry: /* Point of no return. */ arm_preempt_fences(vm, &preempt_fences); resume_and_reinstall_preempt_fences(vm, &exec); - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); out_unlock: - drm_exec_fini(&exec); + xe_validation_ctx_fini(&ctx); out_unlock_outer: if (err == -EAGAIN) { trace_xe_vm_rebind_worker_retry(vm); @@ -583,203 +553,6 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) -{ - struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - /* - * Tell exec and rebind worker they need to repin and rebind this - * userptr. - */ - if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED)) { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&userptr->invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - } - - /* - * Preempt fences turn into schedule disables, pipeline these. - * Note that even in fault mode, we need to wait for binds and - * unbinds to complete, and those are attached as BOOKMARK fences - * to the vm. - */ - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { - err = xe_vm_invalidate_vma(vma); - XE_WARN_ON(err); - } - - xe_hmm_userptr_unmap(uvma); -} - -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - __vma_userptr_invalidate(vm, uvma); - up_write(&vm->userptr.notifier_lock); - trace_xe_vma_userptr_invalidate_complete(vma); - - return true; -} - -static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { - .invalidate = vma_userptr_invalidate, -}; - -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -/** - * xe_vma_userptr_force_invalidate() - force invalidate a userptr - * @uvma: The userptr vma to invalidate - * - * Perform a forced userptr invalidation for testing purposes. - */ -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) -{ - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - /* Protect against concurrent userptr pinning */ - lockdep_assert_held(&vm->lock); - /* Protect against concurrent notifiers */ - lockdep_assert_held(&vm->userptr.notifier_lock); - /* - * Protect against concurrent instances of this function and - * the critical exec sections - */ - xe_vm_assert_held(vm); - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq)) - uvma->userptr.notifier_seq -= 2; - __vma_userptr_invalidate(vm, uvma); -} -#endif - -int xe_vm_userptr_pin(struct xe_vm *vm) -{ - struct xe_userptr_vma *uvma, *next; - int err = 0; - - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); - lockdep_assert_held_write(&vm->lock); - - /* Collect invalidated userptrs */ - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, - userptr.invalidate_link) { - list_del_init(&uvma->userptr.invalidate_link); - list_add_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); - } - spin_unlock(&vm->userptr.invalidated_lock); - - /* Pin and move to bind list */ - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - err = xe_vma_userptr_pin_pages(uvma); - if (err == -EFAULT) { - list_del_init(&uvma->userptr.repin_link); - /* - * We might have already done the pin once already, but - * then had to retry before the re-bind happened, due - * some other condition in the caller, but in the - * meantime the userptr got dinged by the notifier such - * that we need to revalidate here, but this time we hit - * the EFAULT. In such a case make sure we remove - * ourselves from the rebind list to avoid going down in - * flames. - */ - if (!list_empty(&uvma->vma.combined_links.rebind)) - list_del_init(&uvma->vma.combined_links.rebind); - - /* Wait for pending binds */ - xe_vm_lock(vm, false); - dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - - down_read(&vm->userptr.notifier_lock); - err = xe_vm_invalidate_vma(&uvma->vma); - up_read(&vm->userptr.notifier_lock); - xe_vm_unlock(vm); - if (err) - break; - } else { - if (err) - break; - - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, - &vm->rebind_list); - } - } - - if (err) { - down_write(&vm->userptr.notifier_lock); - spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - } - spin_unlock(&vm->userptr.invalidated_lock); - up_write(&vm->userptr.notifier_lock); - } - return err; -} - -/** - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs - * that need repinning. - * @vm: The VM. - * - * This function does an advisory check for whether the VM has userptrs that - * need repinning. - * - * Return: 0 if there are no indications of userptrs needing repinning, - * -EAGAIN if there are. - */ -int xe_vm_userptr_check_repin(struct xe_vm *vm) -{ - return (list_empty_careful(&vm->userptr.repin_list) && - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) { int i; @@ -843,6 +616,13 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -855,8 +635,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -953,7 +732,7 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); @@ -1043,7 +822,7 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask); @@ -1126,7 +905,7 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, for_each_tile(tile, vm->xe, id) { vops.pt_update_ops[id].wait_vm_bookkeep = true; vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); + xe_migrate_exec_queue(tile->migrate); } err = xe_vm_ops_add_range_unbind(&vops, range); @@ -1159,25 +938,18 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 start, u64 end, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, + unsigned int flags) { struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -1198,10 +970,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -1212,10 +980,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -1223,7 +988,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (vm->xe->info.has_atomic_enable_pte_bit) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - vma->pat_index = pat_index; + vma->attr = *attr; if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -1243,25 +1008,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + struct xe_userptr_vma *uvma = to_userptr_vma(vma); u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&userptr->invalidate_link); - INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - mutex_init(&userptr->unmap_mutex); - err = mmu_interval_notifier_insert(&userptr->notifier, - current->mm, - xe_vma_userptr(vma), size, - &vma_userptr_notifier_ops); + err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size); if (err) { xe_vma_free(vma); return ERR_PTR(err); } - - userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -1281,18 +1038,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma) if (xe_vma_is_userptr(vma)) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); - struct xe_userptr *userptr = &uvma->userptr; - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - /* - * Since userptr pages are not pinned, we can't remove - * the notifier until we're sure the GPU is not accessing - * them anymore - */ - mmu_interval_notifier_remove(&userptr->notifier); - mutex_destroy(&userptr->unmap_mutex); + xe_userptr_remove(uvma); xe_vm_put(vm); } else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) { xe_vm_put(vm); @@ -1329,11 +1076,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) if (xe_vma_is_userptr(vma)) { xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); - - spin_lock(&vm->userptr.invalidated_lock); - xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); + xe_userptr_destroy(to_userptr_vma(vma)); } else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -1381,20 +1124,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) static void xe_vma_destroy_unlocked(struct xe_vma *vma) { + struct xe_device *xe = xe_vma_vm(vma)->xe; + struct xe_validation_ctx ctx; struct drm_exec exec; - int err; + int err = 0; - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; + xe_vma_destroy(vma, NULL); } - - xe_vma_destroy(vma, NULL); - - drm_exec_fini(&exec); + xe_assert(xe, !err); } struct xe_vma * @@ -1512,14 +1254,39 @@ static u64 pte_encode_ps(u32 pt_level) return 0; } -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const u16 pat_index) +static u16 pde_pat_index(struct xe_bo *bo) +{ + struct xe_device *xe = xe_bo_device(bo); + u16 pat_index; + + /* + * We only have two bits to encode the PAT index in non-leaf nodes, but + * these only point to other paging structures so we only need a minimal + * selection of options. The user PAT index is only for encoding leaf + * nodes, where we have use of more bits to do the encoding. The + * non-leaf nodes are instead under driver control so the chosen index + * here should be distict from the user PAT index. Also the + * corresponding coherency of the PAT index should be tied to the + * allocation type of the page table (or at least we should pick + * something which is always safe). + */ + if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached) + pat_index = xe->pat.idx[XE_CACHE_WB]; + else + pat_index = xe->pat.idx[XE_CACHE_NONE]; + + xe_assert(xe, pat_index <= 3); + + return pat_index; +} + +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset) { u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(pat_index); + pde |= pde_encode_pat_index(pde_pat_index(bo)); return pde; } @@ -1594,6 +1361,7 @@ static void vm_destroy_work_func(struct work_struct *w); * @xe: xe device. * @tile: tile to set up for. * @vm: vm to set up for. + * @exec: The struct drm_exec object used to lock the vm resv. * * Sets up a pagetable tree with one page-table per level and a single * leaf PTE. All pagetable entries point to the single page-table or, @@ -1603,16 +1371,19 @@ static void vm_destroy_work_func(struct work_struct *w); * Return: 0 on success, negative error code on error. */ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) + struct xe_vm *vm, struct drm_exec *exec) { u8 id = tile->id; int i; for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } @@ -1640,9 +1411,26 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +static void xe_vm_pt_destroy(struct xe_vm *vm) +{ + struct xe_tile *tile; + u8 id; + + xe_vm_assert_held(vm); + + for_each_tile(tile, vm->xe, id) { + if (vm->pt_root[id]) { + xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); + vm->pt_root[id] = NULL; + } + } +} + +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; + struct xe_validation_ctx ctx; + struct drm_exec exec; struct xe_vm *vm; int err, number_tiles = 0; struct xe_tile *tile; @@ -1661,9 +1449,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1685,7 +1474,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) INIT_LIST_HEAD(&vm->userptr.repin_list); INIT_LIST_HEAD(&vm->userptr.invalidated); - init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); ttm_lru_bulk_move_init(&vm->lru_bulk_move); @@ -1709,13 +1497,12 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); xe_pm_runtime_get_noresume(xe); + INIT_LIST_HEAD(&vm->preempt.pm_activate_link); } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_no_resv; - } + err = xe_svm_init(vm); + if (err) + goto err_no_resv; vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1728,49 +1515,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) drm_gem_object_put(vm_resv_obj); - err = xe_vm_lock(vm, true); - if (err) - goto err_close; - - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAG_64K; - - for_each_tile(tile, xe, id) { - if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_TILE_ID(flags)) - continue; + err = 0; + xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true}, + err) { + err = xe_vm_drm_exec_lock(vm, &exec); + drm_exec_retry_on_contention(&exec); - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); - if (IS_ERR(vm->pt_root[id])) { - err = PTR_ERR(vm->pt_root[id]); - vm->pt_root[id] = NULL; - goto err_unlock_close; - } - } + if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + vm->flags |= XE_VM_FLAG_64K; - if (xe_vm_has_scratch(vm)) { for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) + if (flags & XE_VM_FLAG_MIGRATION && + tile->id != XE_VM_FLAG_TILE_ID(flags)) continue; - err = xe_vm_create_scratch(xe, tile, vm); + vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level, + &exec); + if (IS_ERR(vm->pt_root[id])) { + err = PTR_ERR(vm->pt_root[id]); + vm->pt_root[id] = NULL; + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } + if (err) + break; + + if (xe_vm_has_scratch(vm)) { + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; + + err = xe_vm_create_scratch(xe, tile, vm, &exec); + if (err) { + xe_vm_free_scratch(vm); + xe_vm_pt_destroy(vm); + drm_exec_retry_on_contention(&exec); + xe_validation_retry_on_oom(&ctx, &err); + break; + } + } if (err) - goto err_unlock_close; + break; + vm->batch_invalidate_tlb = true; } - vm->batch_invalidate_tlb = true; - } - if (vm->flags & XE_VM_FLAG_LR_MODE) - vm->batch_invalidate_tlb = false; + if (vm->flags & XE_VM_FLAG_LR_MODE) { + INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); + vm->batch_invalidate_tlb = false; + } - /* Fill pt_root after allocating scratch tables */ - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; + /* Fill pt_root after allocating scratch tables */ + for_each_tile(tile, xe, id) { + if (!vm->pt_root[id]) + continue; - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + xe_pt_populate_empty(tile, vm, vm->pt_root[id]); + } } - xe_vm_unlock(vm); + if (err) + goto err_close; /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { @@ -1794,12 +1600,24 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; -err_unlock_close: - xe_vm_unlock(vm); err_close: xe_vm_close_and_put(vm); return ERR_PTR(err); @@ -1814,6 +1632,8 @@ err_no_resv: for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -1850,7 +1670,7 @@ static void xe_vm_close(struct xe_vm *vm) xe_pt_clear(xe, vm->pt_root[id]); for_each_gt(gt, xe, id) - xe_gt_tlb_invalidation_vm(gt, vm); + xe_tlb_inval_vm(>->tlb_inval, vm); } } @@ -1874,8 +1694,12 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_assert(xe, !vm->preempt.num_exec_queues); xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) + if (xe_vm_in_preempt_fence_mode(vm)) { + mutex_lock(&xe->rebind_resume_lock); + list_del_init(&vm->preempt.pm_activate_link); + mutex_unlock(&xe->rebind_resume_lock); flush_work(&vm->preempt.rebind_work); + } if (xe_vm_in_fault_mode(vm)) xe_svm_close(vm); @@ -1900,9 +1724,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) vma = gpuva_to_vma(gpuva); if (xe_vma_has_no_bo(vma)) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); } xe_vm_remove_vma(vm, vma); @@ -1926,13 +1750,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) * destroy the pagetables immediately. */ xe_vm_free_scratch(vm); - - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } + xe_vm_pt_destroy(vm); xe_vm_unlock(vm); /* @@ -1946,8 +1764,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vma_destroy_unlocked(vma); } - if (xe_vm_in_fault_mode(vm)) - xe_svm_fini(vm); + xe_svm_fini(vm); up_write(&vm->lock); @@ -2024,8 +1841,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) { - return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0); } static struct xe_exec_queue * @@ -2059,16 +1875,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; - if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) + if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929)) args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && @@ -2097,29 +1912,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); @@ -2169,6 +1965,110 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; } +static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + u32 num_vmas = 0; + + lockdep_assert_held(&vm->lock); + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) + num_vmas++; + + return num_vmas; +} + +static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start, + u64 end, struct drm_xe_mem_range_attr *attrs) +{ + struct drm_gpuva *gpuva; + int i = 0; + + lockdep_assert_held(&vm->lock); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (i == *num_vmas) + return -ENOSPC; + + attrs[i].start = xe_vma_start(vma); + attrs[i].end = xe_vma_end(vma); + attrs[i].atomic.val = vma->attr.atomic_access; + attrs[i].pat_index.val = vma->attr.pat_index; + attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd; + attrs[i].preferred_mem_loc.migration_policy = + vma->attr.preferred_loc.migration_policy; + + i++; + } + + *num_vmas = i; + return 0; +} + +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_mem_range_attr *mem_attrs; + struct drm_xe_vm_query_mem_range_attr *args = data; + u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + struct xe_vm *vm; + int err = 0; + + if (XE_IOCTL_DBG(xe, + ((args->num_mem_ranges == 0 && + (attrs_user || args->sizeof_mem_range_attr != 0)) || + (args->num_mem_ranges > 0 && + (!attrs_user || + args->sizeof_mem_range_attr != + sizeof(struct drm_xe_mem_range_attr)))))) + return -EINVAL; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + err = down_read_interruptible(&vm->lock); + if (err) + goto put_vm; + + attrs_user = u64_to_user_ptr(args->vector_of_mem_attr); + + if (args->num_mem_ranges == 0 && !attrs_user) { + args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range); + args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr); + goto unlock_vm; + } + + mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr, + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (!mem_attrs) { + err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM; + goto unlock_vm; + } + + memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr); + err = get_mem_attrs(vm, &args->num_mem_ranges, args->start, + args->start + args->range, mem_attrs); + if (err) + goto free_mem_attrs; + + err = copy_to_user(attrs_user, mem_attrs, + args->sizeof_mem_range_attr * args->num_mem_ranges); + if (err) + err = -EFAULT; + +free_mem_attrs: + kvfree(mem_attrs); +unlock_vm: + up_read(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} + static bool vma_matches(struct xe_vma *vma, u64 page_addr) { if (page_addr > xe_vma_end(vma) - 1 || @@ -2207,9 +2107,9 @@ static const u32 region_to_mem_type[] = { static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, bool post_commit) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_remove_vma(vm, vma); } @@ -2316,10 +2216,17 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, switch (operation) { case DRM_XE_VM_BIND_OP_MAP: - case DRM_XE_VM_BIND_OP_MAP_USERPTR: - ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, - obj, bo_offset_or_userptr); + case DRM_XE_VM_BIND_OP_MAP_USERPTR: { + struct drm_gpuvm_map_req map_req = { + .map.va.addr = addr, + .map.va.range = range, + .map.gem.obj = obj, + .map.gem.offset = bo_offset_or_userptr, + }; + + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req); break; + } case DRM_XE_VM_BIND_OP_UNMAP: ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); break; @@ -2356,20 +2263,25 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile; struct xe_svm_range *svm_range; struct drm_gpusvm_ctx ctx = {}; - struct xe_tile *tile; + struct drm_pagemap *dpagemap; u8 id, tile_mask = 0; u32 i; @@ -2380,14 +2292,30 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); for_each_tile(tile, vm->xe, id) tile_mask |= 0x1 << id; xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC); - op->prefetch_range.region = prefetch_region; op->prefetch_range.ranges_count = 0; + tile = NULL; + + if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) { + dpagemap = xe_vma_resolve_pagemap(vma, + xe_device_get_root_tile(vm->xe)); + /* + * TODO: Once multigpu support is enabled will need + * something to dereference tile from dpagemap. + */ + if (dpagemap) + tile = xe_device_get_root_tile(vm->xe); + } else if (prefetch_region) { + tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] - + XE_PL_VRAM0]; + } + + op->prefetch_range.tile = tile; alloc_next_range: svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx); @@ -2406,7 +2334,7 @@ alloc_next_range: goto unwind_prefetch_ops; } - if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) { + if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) { xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID"); goto check_next_range; } @@ -2443,9 +2371,10 @@ unwind_prefetch_ops: ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u16 pat_index, unsigned int flags) + struct xe_vma_mem_attr *attr, unsigned int flags) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; + struct xe_validation_ctx ctx; struct drm_exec exec; struct xe_vma *vma; int err = 0; @@ -2453,9 +2382,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, lockdep_assert_held_write(&vm->lock); if (bo) { - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = 0; + err = 0; + xe_validation_guard(&ctx, &vm->xe->val, &exec, + (struct xe_val_flags) {.interruptible = true}, err) { if (!bo->vm) { err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); drm_exec_retry_on_contention(&exec); @@ -2464,27 +2393,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, err = drm_exec_lock_obj(&exec, &bo->ttm.base); drm_exec_retry_on_contention(&exec); } - if (err) { - drm_exec_fini(&exec); + if (err) return ERR_PTR(err); + + vma = xe_vma_create(vm, bo, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; + + if (!bo->vm) { + err = add_preempt_fences(vm, bo); + if (err) { + prep_vma_destroy(vm, vma, false); + xe_vma_destroy(vma, NULL); + } } } + if (err) + return ERR_PTR(err); + } else { + vma = xe_vma_create(vm, NULL, op->gem.offset, + op->va.addr, op->va.addr + + op->va.range - 1, attr, flags); + if (IS_ERR(vma)) + return vma; + + if (xe_vma_is_userptr(vma)) + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); } - vma = xe_vma_create(vm, bo, op->gem.offset, - op->va.addr, op->va.addr + - op->va.range - 1, pat_index, flags); - if (IS_ERR(vma)) - goto err_unlock; - - if (xe_vma_is_userptr(vma)) - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - else if (!xe_vma_has_no_bo(vma) && !bo->vm) - err = add_preempt_fences(vm, bo); - -err_unlock: - if (bo) - drm_exec_fini(&exec); - if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -2589,6 +2526,29 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) return err; } +/** + * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes + * @vma: Pointer to the xe_vma structure to check + * + * This function determines whether the given VMA (Virtual Memory Area) + * has its memory attributes set to their default values. Specifically, + * it checks the following conditions: + * + * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED` + * - `pat_index` is equal to `default_pat_index` + * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE` + * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES` + * + * Return: true if all attributes are at their default values, false otherwise. + */ +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma) +{ + return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED && + vma->attr.pat_index == vma->attr.default_pat_index && + vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE && + vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES); +} + static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, struct xe_vma_ops *vops) { @@ -2615,23 +2575,26 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - - vma = new_vma(vm, &op->base.map, op->map.pat_index, + struct xe_vma_mem_attr default_attr = { + .preferred_loc = { + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES, + }, + .atomic_access = DRM_XE_ATOMIC_UNDEFINED, + .default_pat_index = op->map.pat_index, + .pat_index = op->map.pat_index, + }; + + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; + + vma = new_vma(vm, &op->base.map, &default_attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); @@ -2652,27 +2615,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, end = op->base.remap.next->va.addr; if (xe_vma_is_cpu_addr_mirror(old) && - xe_svm_has_mapping(vm, start, end)) - return -EBUSY; + xe_svm_has_mapping(vm, start, end)) { + if (vops->flags & XE_VMA_OPS_FLAG_MADVISE) + xe_svm_unmap_address_range(vm, start, end); + else + return -EBUSY; + } op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2702,7 +2658,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, if (op->base.remap.next) { vma = new_vma(vm, op->base.remap.next, - old->pat_index, flags); + &old->attr, flags); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2791,9 +2747,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -2812,9 +2768,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, xe_vma_destroy_unlocked(op->remap.next); } if (vma) { - down_read(&vm->userptr.notifier_lock); + xe_svm_notifier_lock(vm); vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); + xe_svm_notifier_unlock(vm); if (post_commit) xe_vm_insert_vma(vm, vma); } @@ -2853,7 +2809,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2864,7 +2820,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm)); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2887,33 +2844,30 @@ static int check_ufence(struct xe_vma *vma) static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { - bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); + struct xe_tile *tile = op->prefetch_range.tile; int err = 0; struct xe_svm_range *svm_range; struct drm_gpusvm_ctx ctx = {}; - struct xe_tile *tile; unsigned long i; - u32 region; if (!xe_vma_is_cpu_addr_mirror(vma)) return 0; - region = op->prefetch_range.region; - ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = devmem_possible; ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0; + ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe); /* TODO: Threading the migration */ xa_for_each(&op->prefetch_range.range, i, svm_range) { - if (!region) + if (!tile) xe_svm_range_migrate_to_smem(vm, svm_range); - if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { - tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; - err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) { + err = xe_svm_alloc_vram(tile, svm_range, &ctx); if (err) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -2937,14 +2891,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2955,11 +2918,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2968,26 +2933,27 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); u32 region; - if (xe_vma_is_cpu_addr_mirror(vma)) - region = op->prefetch_range.region; - else + if (!xe_vma_is_cpu_addr_mirror(vma)) { region = op->prefetch.region; - - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC || + region <= ARRAY_SIZE(region_to_mem_type)); + } err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), - region_to_mem_type[region]); + region_to_mem_type[region], + NULL, + exec); break; } default: @@ -3028,7 +2994,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3250,35 +3216,37 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm, struct xe_vma_ops *vops) { + struct xe_validation_ctx ctx; struct drm_exec exec; struct dma_fence *fence; - int err; + int err = 0; lockdep_assert_held_write(&vm->lock); - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { + xe_validation_guard(&ctx, &vm->xe->val, &exec, + ((struct xe_val_flags) { + .interruptible = true, + .exec_ignore_duplicates = true, + }), err) { err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); drm_exec_retry_on_contention(&exec); - if (err) { - fence = ERR_PTR(err); - goto unlock; - } + xe_validation_retry_on_oom(&ctx, &err); + if (err) + return ERR_PTR(err); + xe_vm_set_validation_exec(vm, &exec); fence = ops_execute(vm, vops); + xe_vm_set_validation_exec(vm, NULL); if (IS_ERR(fence)) { if (PTR_ERR(fence) == -ENODATA) vm_bind_ioctl_ops_fini(vm, vops, NULL); - goto unlock; + return fence; } vm_bind_ioctl_ops_fini(vm, vops, fence); } -unlock: - drm_exec_fini(&exec); - return fence; + return err ? ERR_PTR(err) : fence; } ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); @@ -3288,7 +3256,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3394,14 +3363,18 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR && + !IS_ENABLED(CONFIG_DRM_GPUSVM)) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & - xe->info.mem_region_mask)) || + XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && + !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } @@ -3421,6 +3394,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3466,9 +3440,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3527,7 +3501,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; @@ -3545,7 +3519,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) q = xe_exec_queue_lookup(xef, args->exec_queue_id); if (XE_IOCTL_DBG(xe, !q)) { err = -ENOENT; - goto put_vm; + goto free_bind_ops; } if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { @@ -3591,7 +3565,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; - goto release_vm_lock; + goto free_bos; } } @@ -3656,6 +3630,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; @@ -3725,17 +3701,20 @@ free_syncs: put_obj: for (i = 0; i < args->num_binds; ++i) xe_bo_put(bos[i]); + + kvfree(ops); +free_bos: + kvfree(bos); release_vm_lock: up_write(&vm->lock); put_exec_queue: if (q) xe_exec_queue_put(q); -put_vm: - xe_vm_put(vm); - kvfree(bos); - kvfree(ops); +free_bind_ops: if (args->num_binds > 1) kvfree(bind_ops); +put_vm: + xe_vm_put(vm); return err; } @@ -3771,7 +3750,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { @@ -3825,10 +3804,14 @@ release_vm_lock: */ int xe_vm_lock(struct xe_vm *vm, bool intr) { + int ret; + if (intr) - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); + else + ret = dma_resv_lock(xe_vm_resv(vm), NULL); - return dma_resv_lock(xe_vm_resv(vm), NULL); + return ret; } /** @@ -3843,7 +3826,7 @@ void xe_vm_unlock(struct xe_vm *vm) } /** - * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an + * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an * address range * @vm: The VM * @start: start address @@ -3854,10 +3837,11 @@ void xe_vm_unlock(struct xe_vm *vm) * * Returns 0 for success, negative error code otherwise. */ -int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask) +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask) { - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; + struct xe_tlb_inval_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; struct xe_tile *tile; u32 fence_id = 0; u8 id; @@ -3867,39 +3851,36 @@ int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, return 0; for_each_tile(tile, vm->xe, id) { - if (tile_mask & BIT(id)) { - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], true); - - err = xe_gt_tlb_invalidation_range(tile->primary_gt, - &fence[fence_id], - start, - end, - vm->usm.asid); - if (err) - goto wait; - ++fence_id; + if (!(tile_mask & BIT(id))) + continue; - if (!tile->media_gt) - continue; + xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval, + &fence[fence_id], true); - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], true); + err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; - err = xe_gt_tlb_invalidation_range(tile->media_gt, - &fence[fence_id], - start, - end, - vm->usm.asid); - if (err) - goto wait; - ++fence_id; - } + if (!tile->media_gt) + continue; + + xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval, + &fence[fence_id], true); + + err = xe_tlb_inval_range(&tile->media_gt->tlb_inval, + &fence[fence_id], start, end, + vm->usm.asid); + if (err) + goto wait; + ++fence_id; } wait: for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + xe_tlb_inval_fence_wait(&fence[id]); return err; } @@ -3937,13 +3918,13 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { - lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) || - (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) && + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || + (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && lockdep_is_held(&xe_vm_resv(vm)->lock.base))); WARN_ON_ONCE(!mmu_interval_check_retry (&to_userptr_vma(vma)->userptr.notifier, - to_userptr_vma(vma)->userptr.notifier_seq)); + to_userptr_vma(vma)->userptr.pages.notifier_seq)); WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP)); @@ -3958,8 +3939,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) xe_device_wmb(xe); - ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma), - xe_vma_end(vma), tile_mask); + ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma), + xe_vma_end(vma), tile_mask); /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */ WRITE_ONCE(vma->tile_invalidated, vma->tile_mask); @@ -4161,3 +4142,221 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) } kvfree(snap); } + +/** + * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations + * @xe: Pointer to the XE device structure + * @vma: Pointer to the virtual memory area (VMA) structure + * @is_atomic: In pagefault path and atomic operation + * + * This function determines whether the given VMA needs to be migrated to + * VRAM in order to do atomic GPU operation. + * + * Return: + * 1 - Migration to VRAM is required + * 0 - Migration is not required + * -EACCES - Invalid access for atomic memory attr + * + */ +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic) +{ + u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access : + vma->attr.atomic_access; + + if (!IS_DGFX(xe) || !is_atomic) + return false; + + /* + * NOTE: The checks implemented here are platform-specific. For + * instance, on a device supporting CXL atomics, these would ideally + * work universally without additional handling. + */ + switch (atomic_access) { + case DRM_XE_ATOMIC_DEVICE: + return !xe->info.has_device_atomics_on_smem; + + case DRM_XE_ATOMIC_CPU: + return -EACCES; + + case DRM_XE_ATOMIC_UNDEFINED: + case DRM_XE_ATOMIC_GLOBAL: + default: + return 1; + } +} + +static int xe_vm_alloc_vma(struct xe_vm *vm, + struct drm_gpuvm_map_req *map_req, + bool is_madvise) +{ + struct xe_vma_ops vops; + struct drm_gpuva_ops *ops = NULL; + struct drm_gpuva_op *__op; + unsigned int vma_flags = 0; + bool remap_op = false; + struct xe_vma_mem_attr tmp_attr; + u16 default_pat; + int err; + + lockdep_assert_held_write(&vm->lock); + + if (is_madvise) + ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req); + else + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req); + + if (IS_ERR(ops)) + return PTR_ERR(ops); + + if (list_empty(&ops->list)) { + err = 0; + goto free_ops; + } + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma = NULL; + + if (!is_madvise) { + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); + default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; + op->map.pat_index = default_pat; + } + } else { + if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + xe_assert(vm->xe, !remap_op); + xe_assert(vm->xe, xe_vma_has_no_bo(vma)); + remap_op = true; + vma_flags = vma->gpuva.flags; + } + + if (__op->op == DRM_GPUVA_OP_MAP) { + xe_assert(vm->xe, remap_op); + remap_op = false; + /* + * In case of madvise ops DRM_GPUVA_OP_MAP is + * always after DRM_GPUVA_OP_REMAP, so ensure + * to propagate the flags from the vma we're + * unmapping. + */ + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; + } + } + print_op(vm->xe, __op); + } + + xe_vma_ops_init(&vops, vm, NULL, NULL, 0); + + if (is_madvise) + vops.flags |= XE_VMA_OPS_FLAG_MADVISE; + + err = vm_bind_ioctl_ops_parse(vm, ops, &vops); + if (err) + goto unwind_ops; + + xe_vm_lock(vm, false); + + drm_gpuva_for_each_op(__op, ops) { + struct xe_vma_op *op = gpuva_op_to_vma_op(__op); + struct xe_vma *vma; + + if (__op->op == DRM_GPUVA_OP_UNMAP) { + vma = gpuva_to_vma(op->base.unmap.va); + /* There should be no unmap for madvise */ + if (is_madvise) + XE_WARN_ON("UNEXPECTED UNMAP"); + + xe_vma_destroy(vma, NULL); + } else if (__op->op == DRM_GPUVA_OP_REMAP) { + vma = gpuva_to_vma(op->base.remap.unmap->va); + /* In case of madvise ops Store attributes for REMAP UNMAPPED + * VMA, so they can be assigned to newly MAP created vma. + */ + if (is_madvise) + tmp_attr = vma->attr; + + xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL); + } else if (__op->op == DRM_GPUVA_OP_MAP) { + vma = op->map.vma; + /* In case of madvise call, MAP will always be follwed by REMAP. + * Therefore temp_attr will always have sane values, making it safe to + * copy them to new vma. + */ + if (is_madvise) + vma->attr = tmp_attr; + } + } + + xe_vm_unlock(vm); + drm_gpuva_ops_free(&vm->gpuvm, ops); + return 0; + +unwind_ops: + vm_bind_ioctl_ops_unwind(vm, &ops, 1); +free_ops: + drm_gpuva_ops_free(&vm->gpuvm, ops); + return err; +} + +/** + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range); + + return xe_vm_alloc_vma(vm, &map_req, true); +} + +/** + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma + * @vm: Pointer to the xe_vm structure + * @start: Starting input address + * @range: Size of the input range + * + * This function splits/merges existing vma to create new vma for user provided input range + * + * Return: 0 if success + */ +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range) +{ + struct drm_gpuvm_map_req map_req = { + .map.va.addr = start, + .map.va.range = range, + }; + + lockdep_assert_held_write(&vm->lock); + + vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx", + start, range); + + return xe_vm_alloc_vma(vm, &map_req, false); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3475a118f666..ef8a5019574e 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); @@ -66,6 +66,8 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm) struct xe_vma * xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range); +bool xe_vma_has_default_mem_attrs(struct xe_vma *vma); + /** * xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs * @vm: The vm @@ -171,6 +173,12 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr); +int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic); + +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size); + /** * to_userptr_vma() - Return a pointer to an embedding userptr vma * @vma: Pointer to the embedded struct xe_vma @@ -191,7 +199,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file); - +int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file); void xe_vm_close_and_put(struct xe_vm *vm); static inline bool xe_vm_in_fault_mode(struct xe_vm *vm) @@ -212,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm) int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q); -int xe_vm_userptr_pin(struct xe_vm *vm); - -int __xe_vm_userptr_needs_repin(struct xe_vm *vm); - -int xe_vm_userptr_check_repin(struct xe_vm *vm); - int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask); @@ -228,8 +230,8 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm, struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm, struct xe_svm_range *range); -int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start, - u64 end, u8 tile_mask); +int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start, + u64 end, u8 tile_mask); int xe_vm_invalidate_vma(struct xe_vma *vma); @@ -258,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); - -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); - -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); - int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, @@ -273,6 +269,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, struct xe_exec_queue *q, u64 addr, enum xe_cache_level cache_lvl); +void xe_vm_resume_rebind_worker(struct xe_vm *vm); + /** * xe_vm_resv() - Return's the vm's reservation object * @vm: The vm @@ -292,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked); */ #define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) #define vm_dbg drm_dbg #else @@ -315,22 +315,14 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. - * - * Return: A pin cookie that should be used for xe_vm_clear_validating(). */ -static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, - bool allow_res_evict) +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { - struct pin_cookie cookie = {}; - if (vm && !allow_res_evict) { xe_vm_assert_held(vm); - cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); /* Pairs with READ_ONCE in xe_vm_is_validating() */ - WRITE_ONCE(vm->validating, current); + WRITE_ONCE(vm->validation.validating, current); } - - return cookie; } /** @@ -338,19 +330,16 @@ static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, * @vm: Pointer to the vm or NULL * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same * value as for xe_vm_set_validation(). - * @cookie: Cookie obtained from xe_vm_set_validating(). * * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. */ -static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, - struct pin_cookie cookie) +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) { if (vm && !allow_res_evict) { - lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); /* Pairs with READ_ONCE in xe_vm_is_validating() */ - WRITE_ONCE(vm->validating, NULL); + WRITE_ONCE(vm->validation.validating, NULL); } } @@ -368,7 +357,7 @@ static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict static inline bool xe_vm_is_validating(struct xe_vm *vm) { /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ - if (READ_ONCE(vm->validating) == current) { + if (READ_ONCE(vm->validation.validating) == current) { xe_vm_assert_held(vm); return true; } @@ -376,6 +365,34 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm) } /** + * xe_vm_set_validation_exec() - Accessor to set the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * @exec: The exec object we want to register. + * + * Set the drm_exec object used to lock the vm's resv. + */ +static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec) +{ + xe_vm_assert_held(vm); + xe_assert(vm->xe, !!exec ^ !!vm->validation._exec); + vm->validation._exec = exec; +} + +/** + * xe_vm_set_validation_exec() - Accessor to read the drm_exec object + * @vm: The vm we want to register a drm_exec object with. + * + * Return: The drm_exec object used to lock the vm's resv. The value + * is a valid pointer, %NULL, or one of the special values defined in + * xe_validation.h. + */ +static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm) +{ + xe_vm_assert_held(vm); + return vm->validation._exec; +} + +/** * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has * a valid GPU mapping * @tile: The tile which the GPU mapping belongs to @@ -394,11 +411,4 @@ static inline bool xe_vm_is_validating(struct xe_vm *vm) #define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \ ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id)) -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) -void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); -#else -static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) -{ -} -#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c new file mode 100644 index 000000000000..cad3cf627c3f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2025 Intel Corporation + */ + +#include "xe_vm_madvise.h" + +#include <linux/nospec.h> +#include <drm/xe_drm.h> + +#include "xe_bo.h" +#include "xe_pat.h" +#include "xe_pt.h" +#include "xe_svm.h" + +struct xe_vmas_in_madvise_range { + u64 addr; + u64 range; + struct xe_vma **vmas; + int num_vmas; + bool has_bo_vmas; + bool has_svm_userptr_vmas; +}; + +static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range) +{ + u64 addr = madvise_range->addr; + u64 range = madvise_range->range; + + struct xe_vma **__vmas; + struct drm_gpuva *gpuva; + int max_vmas = 8; + + lockdep_assert_held(&vm->lock); + + madvise_range->num_vmas = 0; + madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL); + if (!madvise_range->vmas) + return -ENOMEM; + + vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (xe_vma_bo(vma)) + madvise_range->has_bo_vmas = true; + else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma)) + madvise_range->has_svm_userptr_vmas = true; + + if (madvise_range->num_vmas == max_vmas) { + max_vmas <<= 1; + __vmas = krealloc(madvise_range->vmas, + max_vmas * sizeof(*madvise_range->vmas), + GFP_KERNEL); + if (!__vmas) { + kfree(madvise_range->vmas); + return -ENOMEM; + } + madvise_range->vmas = __vmas; + } + + madvise_range->vmas[madvise_range->num_vmas] = vma; + (madvise_range->num_vmas)++; + } + + if (!madvise_range->num_vmas) + kfree(madvise_range->vmas); + + vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas); + + return 0; +} + +static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC); + + for (i = 0; i < num_vmas; i++) { + /*TODO: Extend attributes to bo based vmas */ + if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd && + vmas[i]->attr.preferred_loc.migration_policy == + op->preferred_mem_loc.migration_policy) || + !xe_vma_is_cpu_addr_mirror(vmas[i])) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd; + /* Till multi-device support is not added migration_policy + * is of no use and can be ignored. + */ + vmas[i]->attr.preferred_loc.migration_policy = + op->preferred_mem_loc.migration_policy; + } + } +} + +static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + struct xe_bo *bo; + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC); + xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU); + + for (i = 0; i < num_vmas; i++) { + if (xe_vma_is_userptr(vmas[i]) && + !(op->atomic.val == DRM_XE_ATOMIC_DEVICE && + xe->info.has_device_atomics_on_smem)) { + vmas[i]->skip_invalidation = true; + continue; + } + + if (vmas[i]->attr.atomic_access == op->atomic.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.atomic_access = op->atomic.val; + } + + bo = xe_vma_bo(vmas[i]); + if (!bo || bo->attr.atomic_access == op->atomic.val) + continue; + + vmas[i]->skip_invalidation = false; + xe_bo_assert_held(bo); + bo->attr.atomic_access = op->atomic.val; + + /* Invalidate cpu page table, so bo can migrate to smem in next access */ + if (xe_bo_is_vram(bo) && + (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU || + bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL)) + ttm_bo_unmap_virtual(&bo->ttm); + } +} + +static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op) +{ + int i; + + xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT); + + for (i = 0; i < num_vmas; i++) { + if (vmas[i]->attr.pat_index == op->pat_index.val) { + vmas[i]->skip_invalidation = true; + } else { + vmas[i]->skip_invalidation = false; + vmas[i]->attr.pat_index = op->pat_index.val; + } + } +} + +typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, + struct xe_vma **vmas, int num_vmas, + struct drm_xe_madvise *op); + +static const madvise_func madvise_funcs[] = { + [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, + [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic, + [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index, +}; + +static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + struct drm_gpuva *gpuva; + struct xe_tile *tile; + u8 id, tile_mask = 0; + + lockdep_assert_held_write(&vm->lock); + + /* Wait for pending binds */ + if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT) <= 0) + XE_WARN_ON(1); + + drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + + if (vma->skip_invalidation || xe_vma_is_null(vma)) + continue; + + if (xe_vma_is_cpu_addr_mirror(vma)) { + tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm, + xe_vma_start(vma), + xe_vma_end(vma)); + } else { + for_each_tile(tile, vm->xe, id) { + if (xe_pt_zap_ptes(tile, vma)) { + tile_mask |= BIT(id); + + /* + * WRITE_ONCE pairs with READ_ONCE + * in xe_vm_has_valid_gpu_mapping() + */ + WRITE_ONCE(vma->tile_invalidated, + vma->tile_invalidated | BIT(id)); + } + } + } + } + + return tile_mask; +} + +static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end) +{ + u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end); + + if (!tile_mask) + return 0; + + xe_device_wmb(vm->xe); + + return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask); +} + +static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args) +{ + if (XE_IOCTL_DBG(xe, !args)) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K))) + return false; + + if (XE_IOCTL_DBG(xe, args->range < SZ_4K)) + return false; + + switch (args->type) { + case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC: + { + s32 fd = (s32)args->preferred_mem_loc.devmem_fd; + + if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) + return false; + break; + } + case DRM_XE_MEM_RANGE_ATTR_ATOMIC: + if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->atomic.reserved)) + return false; + + break; + case DRM_XE_MEM_RANGE_ATTR_PAT: + { + u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val); + + if (XE_IOCTL_DBG(xe, !coh_mode)) + return false; + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.pad)) + return false; + + if (XE_IOCTL_DBG(xe, args->pat_index.reserved)) + return false; + break; + } + default: + if (XE_IOCTL_DBG(xe, 1)) + return false; + } + + if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) + return false; + + return true; +} + +static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, + int num_vmas, u32 atomic_val) +{ + struct xe_device *xe = vm->xe; + struct xe_bo *bo; + int i; + + for (i = 0; i < num_vmas; i++) { + bo = xe_vma_bo(vmas[i]); + if (!bo) + continue; + /* + * NOTE: The following atomic checks are platform-specific. For example, + * if a device supports CXL atomics, these may not be necessary or + * may behave differently. + */ + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU && + !(bo->flags & XE_BO_FLAG_SYSTEM))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE && + !(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1) && + !(bo->flags & XE_BO_FLAG_SYSTEM && + xe->info.has_device_atomics_on_smem))) + return false; + + if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL && + (!(bo->flags & XE_BO_FLAG_SYSTEM) || + (!(bo->flags & XE_BO_FLAG_VRAM0) && + !(bo->flags & XE_BO_FLAG_VRAM1))))) + return false; + } + return true; +} +/** + * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM + * @dev: DRM device pointer + * @data: Pointer to ioctl data (drm_xe_madvise*) + * @file: DRM file pointer + * + * Handles the MADVISE ioctl to provide memory advice for vma's within + * input range. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct xe_file *xef = to_xe_file(file); + struct drm_xe_madvise *args = data; + struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, + .range = args->range, }; + struct xe_vm *vm; + struct drm_exec exec; + int err, attr_type; + + vm = xe_vm_lookup(xef, args->vm_id); + if (XE_IOCTL_DBG(xe, !vm)) + return -EINVAL; + + if (!madvise_args_are_sane(vm->xe, args)) { + err = -EINVAL; + goto put_vm; + } + + xe_svm_flush(vm); + + err = down_write_killable(&vm->lock); + if (err) + goto put_vm; + + if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { + err = -ENOENT; + goto unlock_vm; + } + + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); + if (err) + goto unlock_vm; + + err = get_vmas(vm, &madvise_range); + if (err || !madvise_range.num_vmas) + goto unlock_vm; + + if (madvise_range.has_bo_vmas) { + if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { + if (!check_bo_args_are_sane(vm, madvise_range.vmas, + madvise_range.num_vmas, + args->atomic.val)) { + err = -EINVAL; + goto unlock_vm; + } + } + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + for (int i = 0; i < madvise_range.num_vmas; i++) { + struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]); + + if (!bo) + continue; + err = drm_exec_lock_obj(&exec, &bo->ttm.base); + drm_exec_retry_on_contention(&exec); + if (err) + goto err_fini; + } + } + } + + if (madvise_range.has_svm_userptr_vmas) { + err = xe_svm_notifier_lock_interruptible(vm); + if (err) + goto err_fini; + } + + attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args); + + err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range); + + if (madvise_range.has_svm_userptr_vmas) + xe_svm_notifier_unlock(vm); + +err_fini: + if (madvise_range.has_bo_vmas) + drm_exec_fini(&exec); + kfree(madvise_range.vmas); + madvise_range.vmas = NULL; +unlock_vm: + up_write(&vm->lock); +put_vm: + xe_vm_put(vm); + return err; +} diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h new file mode 100644 index 000000000000..b0e1fc445f23 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VM_MADVISE_H_ +#define _XE_VM_MADVISE_H_ + +struct drm_device; +struct drm_file; + +int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +#endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index bed6088e1bb3..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -17,6 +17,7 @@ #include "xe_device_types.h" #include "xe_pt_types.h" #include "xe_range_fence.h" +#include "xe_userptr.h" struct xe_bo; struct xe_svm_range; @@ -45,36 +46,44 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) + +/** + * struct xe_vma_mem_attr - memory attributes associated with vma + */ +struct xe_vma_mem_attr { + /** @preferred_loc: perferred memory_location */ + struct { + /** @preferred_loc.migration_policy: Pages migration policy */ + u32 migration_policy; + + /** + * @preferred_loc.devmem_fd: used for determining pagemap_fd + * requested by user DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM and + * DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE mean system memory or + * closest device memory respectively. + */ + u32 devmem_fd; + } preferred_loc; -/** struct xe_userptr - User pointer */ -struct xe_userptr { - /** @invalidate_link: Link for the vm::userptr.invalidated list */ - struct list_head invalidate_link; - /** @userptr: link into VM repin list if userptr. */ - struct list_head repin_link; /** - * @notifier: MMU notifier for user pointer (invalidation call back) + * @atomic_access: The atomic access type for the vma + * See %DRM_XE_VMA_ATOMIC_UNDEFINED, %DRM_XE_VMA_ATOMIC_DEVICE, + * %DRM_XE_VMA_ATOMIC_GLOBAL, and %DRM_XE_VMA_ATOMIC_CPU for possible + * values. These are defined in uapi/drm/xe_drm.h. */ - struct mmu_interval_notifier notifier; - /** @sgt: storage for a scatter gather table */ - struct sg_table sgt; - /** @sg: allocated scatter gather table */ - struct sg_table *sg; - /** @notifier_seq: notifier sequence number */ - unsigned long notifier_seq; - /** @unmap_mutex: Mutex protecting dma-unmapping */ - struct mutex unmap_mutex; + u32 atomic_access; + /** - * @initial_bind: user pointer has been bound at least once. - * write: vm->userptr.notifier_lock in read mode and vm->resv held. - * read: vm->userptr.notifier_lock in write mode or vm->resv held. + * @default_pat_index: The pat index for VMA set during first bind by user. */ - bool initial_bind; - /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ - bool mapped; -#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) - u32 divisor; -#endif + u16 default_pat_index; + + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + * same as default_pat_index unless overwritten by madvise. + */ + u16 pat_index; }; struct xe_vma { @@ -102,10 +111,10 @@ struct xe_vma { /** * @tile_invalidated: Tile mask of binding are invalidated for this VMA. - * protected by BO's resv and for userptrs, vm->userptr.notifier_lock in - * write mode for writing or vm->userptr.notifier_lock in read mode and + * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in + * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and * the vm->resv. For stable reading, BO's resv or userptr - * vm->userptr.notifier_lock in read mode is required. Can be + * vm->svm.gpusvm.notifier_lock in read mode is required. Can be * opportunistically read with READ_ONCE outside of locks. */ u8 tile_invalidated; @@ -116,7 +125,7 @@ struct xe_vma { /** * @tile_present: Tile mask of binding are present for this VMA. * protected by vm->lock, vm->resv and for userptrs, - * vm->userptr.notifier_lock for writing. Needs either for reading, + * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading, * but if reading is done under the vm->lock only, it needs to be held * in write mode. */ @@ -126,15 +135,22 @@ struct xe_vma { u8 tile_staged; /** - * @pat_index: The pat index to use when encoding the PTEs for this vma. + * @skip_invalidation: Used in madvise to avoid invalidation + * if mem attributes doesn't change */ - u16 pat_index; + bool skip_invalidation; /** * @ufence: The user fence that was provided with MAP. * Needs to be signalled before UNMAP can be processed. */ struct xe_user_fence *ufence; + + /** + * @attr: The attributes of vma which determines the migration policy + * and encoding of the PTEs for this vma. + */ + struct xe_vma_mem_attr attr; }; /** @@ -244,33 +260,7 @@ struct xe_vm { const struct xe_pt_ops *pt_ops; /** @userptr: user pointer state */ - struct { - /** - * @userptr.repin_list: list of VMAs which are user pointers, - * and needs repinning. Protected by @lock. - */ - struct list_head repin_list; - /** - * @notifier_lock: protects notifier in write mode and - * submission in read mode. - */ - struct rw_semaphore notifier_lock; - /** - * @userptr.invalidated_lock: Protects the - * @userptr.invalidated list. - */ - spinlock_t invalidated_lock; - /** - * @userptr.invalidated: List of invalidated userptrs, not yet - * picked - * up for revalidation. Protected from access with the - * @invalidated_lock. Removing items from the list - * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifer_lock in - * write mode, OR @lock in write mode. - */ - struct list_head invalidated; - } userptr; + struct xe_userptr_vm userptr; /** @preempt: preempt state */ struct { @@ -293,6 +283,11 @@ struct xe_vm { * BOs */ struct work_struct rebind_work; + /** + * @preempt.pm_activate_link: Link to list of rebind workers to be + * kicked on resume. + */ + struct list_head pm_activate_link; } preempt; /** @um: unified memory state */ @@ -313,18 +308,34 @@ struct xe_vm { } error_capture; /** + * @validation: Validation data only valid with the vm resv held. + * Note: This is really task state of the task holding the vm resv, + * and moving forward we should + * come up with a better way of passing this down the call- + * chain. + */ + struct { + /** + * @validation.validating: The task that is currently making bos resident. + * for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; + /** + * @validation.exec The drm_exec context used when locking the vm resv. + * Protected by the vm's resv. + */ + struct drm_exec *_exec; + } validation; + + /** * @tlb_flush_seqno: Required TLB flush seqno for the next exec. * protected by the vm resv. */ u64 tlb_flush_seqno; - /** - * @validating: The task that is currently making bos resident for this vm. - * Protected by the VM's resv for writing. Opportunistic reading can be done - * using READ_ONCE. Note: This is a workaround for the - * TTM eviction_valuable() callback not being passed a struct - * ttm_operation_context(). Future work might want to address this. - */ - struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ @@ -335,17 +346,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; @@ -395,8 +399,11 @@ struct xe_vma_op_prefetch_range { struct xarray range; /** @ranges_count: number of svm ranges to map */ u32 ranges_count; - /** @region: memory region to prefetch to */ - u32 region; + /** + * @tile: Pointer to the tile structure containing memory to prefetch. + * NULL if prefetch requested region is smem + */ + struct xe_tile *tile; }; /** enum xe_vma_op_flags - flags for VMA operation */ @@ -462,6 +469,8 @@ struct xe_vma_ops { struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE]; /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) +#define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index e421a74fb87c..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -3,6 +3,7 @@ * Copyright © 2021-2024 Intel Corporation */ +#include <kunit/visibility.h> #include <linux/pci.h> #include <drm/drm_managed.h> @@ -19,19 +20,41 @@ #include "xe_mmio.h" #include "xe_module.h" #include "xe_sriov.h" +#include "xe_ttm_vram_mgr.h" #include "xe_vram.h" +#include "xe_vram_types.h" #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -47,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -116,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -136,7 +159,7 @@ static bool resource_is_valid(struct pci_dev *pdev, int bar) return true; } -static int determine_lmem_bar_size(struct xe_device *xe) +static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -145,18 +168,16 @@ static int determine_lmem_bar_size(struct xe_device *xe) return -ENXIO; } - resize_vram_bar(xe); - - xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); - xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); - if (!xe->mem.vram.io_size) + lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); + lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); + if (!lmem_bar->io_size) return -EIO; /* XXX: Need to change when xe link code is ready */ - xe->mem.vram.dpa_base = 0; + lmem_bar->dpa_base = 0; /* set up a map to the total memory area. */ - xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); + lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size); return 0; } @@ -278,13 +299,71 @@ static void vram_fini(void *arg) struct xe_tile *tile; int id; - if (xe->mem.vram.mapping) - iounmap(xe->mem.vram.mapping); - - xe->mem.vram.mapping = NULL; + xe->mem.vram->mapping = NULL; for_each_tile(tile, xe, id) - tile->mem.vram.mapping = NULL; + tile->mem.vram->mapping = NULL; +} + +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement) +{ + struct xe_vram_region *vram; + struct drm_device *drm = &xe->drm; + + xe_assert(xe, id < xe->info.tile_count); + + vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL); + if (!vram) + return NULL; + + vram->xe = xe; + vram->id = id; + vram->placement = placement; +#if defined(CONFIG_DRM_XE_PAGEMAP) + vram->migrate = xe->tiles[id].migrate; +#endif + return vram; +} + +static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram) +{ + struct drm_device *drm = &xe->drm; + + if (vram->io_size < vram->usable_size) + drm_info(drm, "Small BAR device\n"); + + drm_info(drm, + "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", + vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size); + drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", + vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size, + &vram->io_start, vram->io_start + (u64)vram->io_size); +} + +static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram, + struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size, + u64 region_size, resource_size_t remain_io_size) +{ + /* Check if VRAM region is already initialized */ + if (vram->mapping) + return 0; + + vram->actual_physical_size = region_size; + vram->io_start = lmem_bar->io_start + offset; + vram->io_size = min_t(u64, usable_size, remain_io_size); + + if (!vram->io_size) { + drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); + return -ENODEV; + } + + vram->dpa_base = lmem_bar->dpa_base + offset; + vram->mapping = lmem_bar->mapping + offset; + vram->usable_size = usable_size; + + print_vram_region_info(xe, vram); + + return 0; } /** @@ -298,78 +377,108 @@ static void vram_fini(void *arg) int xe_vram_probe(struct xe_device *xe) { struct xe_tile *tile; - resource_size_t io_size; + struct xe_vram_region lmem_bar; + resource_size_t remain_io_size; u64 available_size = 0; u64 total_size = 0; - u64 tile_offset; - u64 tile_size; - u64 vram_size; int err; u8 id; if (!IS_DGFX(xe)) return 0; - /* Get the size of the root tile's vram for later accessibility comparison */ - tile = xe_device_get_root_tile(xe); - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + err = determine_lmem_bar_size(xe, &lmem_bar); if (err) return err; + drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size); - err = determine_lmem_bar_size(xe); - if (err) - return err; + remain_io_size = lmem_bar.io_size; - drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.io_size); - - io_size = xe->mem.vram.io_size; - - /* tile specific ranges */ for_each_tile(tile, xe, id) { - err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); + u64 region_size; + u64 usable_size; + u64 tile_offset; + + err = tile_vram_size(tile, &usable_size, ®ion_size, &tile_offset); if (err) return err; - tile->mem.vram.actual_physical_size = tile_size; - tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; - tile->mem.vram.io_size = min_t(u64, vram_size, io_size); + total_size += region_size; + available_size += usable_size; - if (!tile->mem.vram.io_size) { - drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); - return -ENODEV; + err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size, + region_size, remain_io_size); + if (err) + return err; + + if (total_size > lmem_bar.io_size) { + drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", + &total_size, &lmem_bar.io_size); } - tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; - tile->mem.vram.usable_size = vram_size; - tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; + remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size); + } - if (tile->mem.vram.io_size < tile->mem.vram.usable_size) - drm_info(&xe->drm, "Small BAR device\n"); - drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, - tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); - drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, - &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, - &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); + err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size, + lmem_bar.io_size); + if (err) + return err; - /* calculate total size using tile size to get the correct HW sizing */ - total_size += tile_size; - available_size += vram_size; + return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +} - if (total_size > xe->mem.vram.io_size) { - drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", - &total_size, &xe->mem.vram.io_size); - } +/** + * xe_vram_region_io_start - Get the IO start of a VRAM region + * @vram: the VRAM region + * + * Return: the IO start of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram) +{ + return vram ? vram->io_start : 0; +} - io_size -= min_t(u64, tile_size, io_size); - } +/** + * xe_vram_region_io_size - Get the IO size of a VRAM region + * @vram: the VRAM region + * + * Return: the IO size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram) +{ + return vram ? vram->io_size : 0; +} - xe->mem.vram.actual_physical_size = total_size; +/** + * xe_vram_region_dpa_base - Get the DPA base of a VRAM region + * @vram: the VRAM region + * + * Return: the DPA base of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram) +{ + return vram ? vram->dpa_base : 0; +} - drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &xe->mem.vram.actual_physical_size); - drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, - &available_size); +/** + * xe_vram_region_usable_size - Get the usable size of a VRAM region + * @vram: the VRAM region + * + * Return: the usable size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram) +{ + return vram ? vram->usable_size : 0; +} - return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe); +/** + * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region + * @vram: the VRAM region + * + * Return: the actual physical size of the VRAM region, or 0 if not valid + */ +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram) +{ + return vram ? vram->actual_physical_size : 0; } +EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size); diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index e31cc04ec0db..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -6,8 +6,20 @@ #ifndef _XE_VRAM_H_ #define _XE_VRAM_H_ +#include <linux/types.h> + struct xe_device; +struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); +struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); + +resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram); +resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram); + #endif diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index b26e26d73dae..17bc84da4cdc 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -34,7 +34,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) @@ -56,7 +56,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_tile *tile = dev_to_tile(dev); - u32 val, mbox; + u32 val = 0, mbox; int err; mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h new file mode 100644 index 000000000000..83772dcbf1af --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_types.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_VRAM_TYPES_H_ +#define _XE_VRAM_TYPES_H_ + +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +#include <drm/drm_pagemap.h> +#endif + +#include "xe_ttm_vram_mgr_types.h" + +struct xe_device; +struct xe_migrate; + +/** + * struct xe_vram_region - memory region structure + * This is used to describe a memory region in xe + * device, such as HBM memory or CXL extension memory. + */ +struct xe_vram_region { + /** @xe: Back pointer to xe device */ + struct xe_device *xe; + /** + * @id: VRAM region instance id + * + * The value should be unique for VRAM region. + */ + u8 id; + /** @io_start: IO start address of this VRAM instance */ + resource_size_t io_start; + /** + * @io_size: IO size of this VRAM instance + * + * This represents how much of this VRAM we can access + * via the CPU through the VRAM BAR. This can be smaller + * than @usable_size, in which case only part of VRAM is CPU + * accessible (typically the first 256M). This + * configuration is known as small-bar. + */ + resource_size_t io_size; + /** @dpa_base: This memory regions's DPA (device physical address) base */ + resource_size_t dpa_base; + /** + * @usable_size: usable size of VRAM + * + * Usable size of VRAM excluding reserved portions + * (e.g stolen mem) + */ + resource_size_t usable_size; + /** + * @actual_physical_size: Actual VRAM size + * + * Actual VRAM size including reserved portions + * (e.g stolen mem) + */ + resource_size_t actual_physical_size; + /** @mapping: pointer to VRAM mappable space */ + void __iomem *mapping; + /** @ttm: VRAM TTM manager */ + struct xe_ttm_vram_mgr ttm; + /** @placement: TTM placement dedicated for this region */ + u32 placement; +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + /** @migrate: Back pointer to migrate */ + struct xe_migrate *migrate; + /** @pagemap: Used to remap device memory as ZONE_DEVICE */ + struct dev_pagemap pagemap; + /** + * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory + * pages of this tile. + */ + struct drm_pagemap dpagemap; + /** + * @hpa_base: base host physical address + * + * This is generated when remap device memory as ZONE_DEVICE + */ + resource_size_t hpa_base; +#endif +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index 3e573b0b7ebd..8f23a27871b6 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -24,6 +24,7 @@ #define BMG_DEVICE_ID 0xE2F8 static struct intel_vsec_header bmg_telemetry = { + .rev = 1, .length = 0x10, .id = VSEC_ID_TELEMETRY, .num_entries = 2, @@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = { .offset = BMG_DISCOVERY_OFFSET, }; -static struct intel_vsec_header bmg_punit_crashlog = { +static struct intel_vsec_header bmg_crashlog = { + .rev = 1, .length = 0x10, .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, + .num_entries = 2, + .entry_size = 6, .tbir = 0, .offset = BMG_DISCOVERY_OFFSET + 0x60, }; -static struct intel_vsec_header bmg_oobmsm_crashlog = { - .length = 0x10, - .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, - .tbir = 0, - .offset = BMG_DISCOVERY_OFFSET + 0x78, -}; - static struct intel_vsec_header *bmg_capabilities[] = { &bmg_telemetry, - &bmg_punit_crashlog, - &bmg_oobmsm_crashlog, + &bmg_crashlog, NULL }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4a76de391abb..cd03891654a1 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -38,7 +39,8 @@ * Register Immediate commands) once when initializing the device and saved in * the default context. That default context is then used on every context * creation to have a "primed golden context", i.e. a context image that - * already contains the changes needed to all the registers. + * already contains the changes needed to all the registers. See + * drivers/gpu/drm/xe/xe_lrc.c for default context handling. * * - Engine workarounds: the list of these WAs is applied whenever the specific * engine is reset. It's also possible that a set of engine classes share a @@ -47,10 +49,10 @@ * them need to keeep the workaround programming: the approach taken in the * driver is to tie those workarounds to the first compute/render engine that * is registered. When executing with GuC submission, engine resets are - * outside of kernel driver control, hence the list of registers involved in + * outside of kernel driver control, hence the list of registers involved is * written once, on engine initialization, and then passed to GuC, that * saves/restores their values before/after the reset takes place. See - * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference. + * drivers/gpu/drm/xe/xe_guc_ads.c for reference. * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -65,21 +67,39 @@ * hardware on every HW context restore. These buffers are created and * programmed in the default context so the hardware always go through those * programming sequences when switching contexts. The support for workaround - * batchbuffers is enabled these hardware mechanisms: + * batchbuffers is enabled via these hardware mechanisms: * - * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default - * context, pointing the hardware to jump to that location when that offset - * is reached in the context restore. Workaround batchbuffer in the driver - * currently uses this mechanism for all platforms. + * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer + * and an offset are provided in the default context, pointing the hardware + * to jump to that location when that offset is reached in the context + * restore. When a context is being restored, this is executed after the + * ring context, in the middle (or beginning) of the engine context image. * - * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context, - * pointing the hardware to a buffer to continue executing after the - * engine registers are restored in a context restore sequence. This is - * currently not used in the driver. + * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A + * batchbuffer is provided in the default context, pointing the hardware to + * a buffer to continue executing after the engine registers are restored + * in a context restore sequence. + * + * Below is the timeline for a context restore sequence: + * + * .. code:: + * + * INDIRECT_CTX_OFFSET + * |----------->| + * .------------.------------.-------------.------------.--------------.-----------. + * |Ring | Engine | Mid-context | Engine | Post-context | Ring | + * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution | + * `------------'------------'-------------'------------'--------------'-----------' * * - Other/OOB: There are WAs that, due to their nature, cannot be applied from * a central place. Those are peppered around the rest of the code, as needed. - * Workarounds related to the display IP are the main example. + * There's a central place to control which workarounds are enabled: + * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and + * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds. + * These files only record which workarounds are enabled: during early device + * initialization those rules are evaluated and recorded by the driver. Then + * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to + * implement them. * * .. [1] Technically, some registers are powercontext saved & restored, so they * survive a suspend/resume. In practice, writing them again is not too @@ -285,6 +305,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), @@ -525,6 +557,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, /* Xe2_HPG */ @@ -589,6 +626,18 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) }, + { XE_RTP_NAME("13012615864"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) + }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, /* Xe2_LPM */ @@ -634,7 +683,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, { XE_RTP_NAME("13012615864"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR, + GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, @@ -644,6 +694,17 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, + { XE_RTP_NAME("18041344222"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), + FUNC(xe_rtp_match_not_sriov_vf), + FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)), + XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -851,6 +912,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -860,16 +925,41 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** - * xe_wa_process_oob - process OOB workaround table + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** + * xe_wa_process_gt_oob - process GT OOB workaround table * @gt: GT instance to process workarounds for * * Process OOB workaround table for this platform, marking in @gt the * workarounds that are active. */ -void xe_wa_process_oob(struct xe_gt *gt) +void xe_wa_process_gt_oob(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); @@ -931,12 +1021,34 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** - * xe_wa_init - initialize gt with workaround bookkeeping + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** + * xe_wa_gt_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * * Returns 0 for success, negative error code otherwise. */ -int xe_wa_init(struct xe_gt *gt) +int xe_wa_gt_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); size_t n_oob, n_lrc, n_engine, n_gt, total; @@ -962,7 +1074,17 @@ int xe_wa_init(struct xe_gt *gt) return 0; } -ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +ALLOW_ERROR_INJECTION(xe_wa_gt_init, ERRNO); /* See xe_pci_probe() */ + +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { @@ -1005,6 +1127,6 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile) if (IS_SRIOV_VF(tile->xe)) return; - if (XE_WA(tile->primary_gt, 22010954014)) + if (XE_GT_WA(tile->primary_gt, 22010954014)) xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); } diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..6a869b2de643 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,23 +13,41 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; -int xe_wa_init(struct xe_gt *gt); -void xe_wa_process_oob(struct xe_gt *gt); +int xe_wa_device_init(struct xe_device *xe); +int xe_wa_gt_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); +void xe_wa_process_gt_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ -#define XE_WA(gt__, id__) ({ \ +#define XE_GT_WA(gt__, id__) ({ \ xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \ test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0ee74a5b2407..f3a6d5d239ce 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,4 +1,6 @@ 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) +16010904313 GRAPHICS_VERSION_RANGE(1200, 1210) +18022495364 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) @@ -30,21 +32,23 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) - GRAPHICS_VERSION(3001) + GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) -no_media_l3 MEDIA_VERSION(3000) 14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0) 16021333562 GRAPHICS_VERSION_RANGE(1200, 1274) @@ -58,9 +62,22 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) +14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled) +16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled) + MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) +16026007364 MEDIA_VERSION(3000) diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index dfa78a49a6d9..806ec66ee7f7 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -54,6 +54,7 @@ static const struct drm_framebuffer_funcs fb_funcs = { static struct drm_framebuffer * fb_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct xen_drm_front_drm_info *drm_info = dev->dev_private; @@ -61,7 +62,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp, struct drm_gem_object *gem_obj; int ret; - fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); + fb = drm_gem_fb_create_with_funcs(dev, filp, info, mode_cmd, &fb_funcs); if (IS_ERR(fb)) return fb; diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 02e1feaa6115..34ddbf98e81d 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1720,7 +1720,8 @@ disconnected: return connector_status_disconnected; } -static enum drm_connector_status zynqmp_dp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +zynqmp_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct zynqmp_dp *dp = bridge_to_dp(bridge); @@ -1869,20 +1870,14 @@ static int zynqmp_dp_test_setup(struct zynqmp_dp *dp) static ssize_t zynqmp_dp_pattern_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; struct zynqmp_dp *dp = file->private_data; char buf[16]; ssize_t ret; - ret = debugfs_file_get(dentry); - if (unlikely(ret)) - return ret; - scoped_guard(mutex, &dp->lock) ret = snprintf(buf, sizeof(buf), "%s\n", test_pattern_str[dp->test.pattern]); - debugfs_file_put(dentry); return simple_read_from_buffer(user_buf, count, ppos, buf, ret); } @@ -1890,27 +1885,20 @@ static ssize_t zynqmp_dp_pattern_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; struct zynqmp_dp *dp = file->private_data; char buf[16]; ssize_t ret; int pattern; - ret = debugfs_file_get(dentry); - if (unlikely(ret)) - return ret; - ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); if (ret < 0) - goto out; + return ret; buf[ret] = '\0'; pattern = sysfs_match_string(test_pattern_str, buf); - if (pattern < 0) { - ret = -EINVAL; - goto out; - } + if (pattern < 0) + return -EINVAL; mutex_lock(&dp->lock); dp->test.pattern = pattern; @@ -1919,8 +1907,6 @@ static ssize_t zynqmp_dp_pattern_write(struct file *file, dp->test.custom) ?: ret; mutex_unlock(&dp->lock); -out: - debugfs_file_put(dentry); return ret; } @@ -2026,20 +2012,13 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_zynqmp_dp_active, zynqmp_dp_active_get, static ssize_t zynqmp_dp_custom_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; struct zynqmp_dp *dp = file->private_data; ssize_t ret; - ret = debugfs_file_get(dentry); - if (unlikely(ret)) - return ret; - mutex_lock(&dp->lock); ret = simple_read_from_buffer(user_buf, count, ppos, &dp->test.custom, sizeof(dp->test.custom)); mutex_unlock(&dp->lock); - - debugfs_file_put(dentry); return ret; } @@ -2047,18 +2026,13 @@ static ssize_t zynqmp_dp_custom_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct dentry *dentry = file->f_path.dentry; struct zynqmp_dp *dp = file->private_data; ssize_t ret; char buf[sizeof(dp->test.custom)]; - ret = debugfs_file_get(dentry); - if (unlikely(ret)) - return ret; - ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); if (ret < 0) - goto out; + return ret; mutex_lock(&dp->lock); memcpy(dp->test.custom, buf, ret); @@ -2066,9 +2040,6 @@ static ssize_t zynqmp_dp_custom_write(struct file *file, ret = zynqmp_dp_set_test_pattern(dp, dp->test.pattern, dp->test.custom) ?: ret; mutex_unlock(&dp->lock); - -out: - debugfs_file_put(dentry); return ret; } diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c index b47463473472..2bee0a2275ed 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_kms.c +++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c @@ -373,6 +373,7 @@ static int zynqmp_dpsub_dumb_create(struct drm_file *file_priv, static struct drm_framebuffer * zynqmp_dpsub_fb_create(struct drm_device *drm, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct zynqmp_dpsub *dpsub = to_zynqmp_dpsub(drm); @@ -383,7 +384,7 @@ zynqmp_dpsub_fb_create(struct drm_device *drm, struct drm_file *file_priv, for (i = 0; i < ARRAY_SIZE(cmd.pitches); ++i) cmd.pitches[i] = ALIGN(cmd.pitches[i], dpsub->dma_align); - return drm_gem_fb_create(drm, file_priv, &cmd); + return drm_gem_fb_create(drm, file_priv, info, &cmd); } static const struct drm_mode_config_funcs zynqmp_dpsub_mode_config_funcs = { diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig index 8726d80d6ba4..20d3e6d0d796 100644 --- a/drivers/gpu/nova-core/Kconfig +++ b/drivers/gpu/nova-core/Kconfig @@ -1,5 +1,6 @@ config NOVA_CORE tristate "Nova Core GPU driver" + depends on 64BIT depends on PCI depends on RUST depends on RUST_FW_LOADER_ABSTRACTIONS diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs new file mode 100644 index 000000000000..94f44bcfd748 --- /dev/null +++ b/drivers/gpu/nova-core/dma.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Simple DMA object wrapper. + +use core::ops::{Deref, DerefMut}; + +use kernel::device; +use kernel::dma::CoherentAllocation; +use kernel::page::PAGE_SIZE; +use kernel::prelude::*; + +pub(crate) struct DmaObject { + dma: CoherentAllocation<u8>, +} + +impl DmaObject { + pub(crate) fn new(dev: &device::Device<device::Bound>, len: usize) -> Result<Self> { + let len = core::alloc::Layout::from_size_align(len, PAGE_SIZE) + .map_err(|_| EINVAL)? + .pad_to_align() + .size(); + let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?; + + Ok(Self { dma }) + } + + pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + Self::new(dev, data.len()).map(|mut dma_obj| { + // TODO[COHA]: replace with `CoherentAllocation::write()` once available. + // SAFETY: + // - `dma_obj`'s size is at least `data.len()`. + // - We have just created this object and there is no other user at this stage. + unsafe { + core::ptr::copy_nonoverlapping( + data.as_ptr(), + dma_obj.dma.start_ptr_mut(), + data.len(), + ); + } + + dma_obj + }) + } +} + +impl Deref for DmaObject { + type Target = CoherentAllocation<u8>; + + fn deref(&self) -> &Self::Target { + &self.dma + } +} + +impl DerefMut for DmaObject { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.dma + } +} diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 8c86101c26cb..edc72052e27a 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,6 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*}; +use kernel::{ + auxiliary, c_str, + device::Core, + pci, + pci::{Class, ClassMask, Vendor}, + prelude::*, + sizes::SZ_16M, + sync::Arc, +}; use crate::gpu::Gpu; @@ -11,17 +19,32 @@ pub(crate) struct NovaCore { _reg: auxiliary::Registration, } -const BAR0_SIZE: usize = 8; +const BAR0_SIZE: usize = SZ_16M; pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>; kernel::pci_device_table!( PCI_TABLE, MODULE_PCI_TABLE, <NovaCore as pci::Driver>::IdInfo, - [( - pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_NVIDIA, bindings::PCI_ANY_ID as _), - () - )] + [ + // Modern NVIDIA GPUs will show up as either VGA or 3D controllers. + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_VGA, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ( + pci::DeviceId::from_class_and_vendor( + Class::DISPLAY_3D, + ClassMask::ClassSubclass, + Vendor::NVIDIA + ), + () + ), + ] ); impl pci::Driver for NovaCore { @@ -34,15 +57,23 @@ impl pci::Driver for NovaCore { pdev.enable_device_mem()?; pdev.set_master(); - let bar = pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0"))?; + let devres_bar = Arc::pin_init( + pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")), + GFP_KERNEL, + )?; + + // Used to provided a `&Bar0` to `Gpu::new` without tying it to the lifetime of + // `devres_bar`. + let bar_clone = Arc::clone(&devres_bar); + let bar = bar_clone.access(pdev.as_ref())?; let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, devres_bar, bar), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), - 0, // TODO: Once it lands, use XArray; for now we don't use the ID. + 0, // TODO[XARR]: Once it lands, use XArray; for now we don't use the ID. crate::MODULE_NAME )?, }), @@ -51,4 +82,8 @@ impl pci::Driver for NovaCore { Ok(this) } + + fn unbind(pdev: &pci::Device<Core>, this: Pin<&Self>) { + this.gpu.unbind(pdev.as_ref()); + } } diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs new file mode 100644 index 000000000000..37e6298195e4 --- /dev/null +++ b/drivers/gpu/nova-core/falcon.rs @@ -0,0 +1,613 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Falcon microprocessor base support + +use core::ops::Deref; +use hal::FalconHal; +use kernel::device; +use kernel::dma::DmaAddress; +use kernel::prelude::*; +use kernel::sync::aref::ARef; +use kernel::time::Delta; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; +use crate::regs; +use crate::regs::macros::RegisterBase; +use crate::util; + +pub(crate) mod gsp; +mod hal; +pub(crate) mod sec2; + +// TODO[FPRI]: Replace with `ToPrimitive`. +macro_rules! impl_from_enum_to_u32 { + ($enum_type:ty) => { + impl From<$enum_type> for u32 { + fn from(value: $enum_type) -> Self { + value as u32 + } + } + }; +} + +/// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRev { + #[default] + Rev1 = 1, + Rev2 = 2, + Rev3 = 3, + Rev4 = 4, + Rev5 = 5, + Rev6 = 6, + Rev7 = 7, +} +impl_from_enum_to_u32!(FalconCoreRev); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconCoreRev { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconCoreRev::*; + + let rev = match value { + 1 => Rev1, + 2 => Rev2, + 3 => Rev3, + 4 => Rev4, + 5 => Rev5, + 6 => Rev6, + 7 => Rev7, + _ => return Err(EINVAL), + }; + + Ok(rev) + } +} + +/// Revision subversion number of a falcon core, used in the +/// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRevSubversion { + #[default] + Subversion0 = 0, + Subversion1 = 1, + Subversion2 = 2, + Subversion3 = 3, +} +impl_from_enum_to_u32!(FalconCoreRevSubversion); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconCoreRevSubversion { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconCoreRevSubversion::*; + + let sub_version = match value & 0b11 { + 0 => Subversion0, + 1 => Subversion1, + 2 => Subversion2, + 3 => Subversion3, + _ => return Err(EINVAL), + }; + + Ok(sub_version) + } +} + +/// Security model of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone)] +/// Security mode of the Falcon microprocessor. +/// +/// See `falcon.rst` for more details. +pub(crate) enum FalconSecurityModel { + /// Non-Secure: runs unsigned code without privileges. + #[default] + None = 0, + /// Light-Secured (LS): Runs signed code with some privileges. + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the code's + /// signature. + /// + /// Also known as Low-Secure, Privilege Level 2 or PL2. + Light = 2, + /// Heavy-Secured (HS): Runs signed code with full privileges. + /// The code's signature is verified by the Falcon Boot ROM (BROM). + /// + /// Also known as High-Secure, Privilege Level 3 or PL3. + Heavy = 3, +} +impl_from_enum_to_u32!(FalconSecurityModel); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconSecurityModel { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + use FalconSecurityModel::*; + + let sec_model = match value { + 0 => None, + 2 => Light, + 3 => Heavy, + _ => return Err(EINVAL), + }; + + Ok(sec_model) + } +} + +/// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] +/// register. It is passed to the Falcon Boot ROM (BROM) as a parameter. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum FalconModSelAlgo { + /// AES. + #[expect(dead_code)] + Aes = 0, + /// RSA3K. + #[default] + Rsa3k = 1, +} +impl_from_enum_to_u32!(FalconModSelAlgo); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconModSelAlgo { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 1 => Ok(FalconModSelAlgo::Rsa3k), + _ => Err(EINVAL), + } + } +} + +/// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum DmaTrfCmdSize { + /// 256 bytes transfer. + #[default] + Size256B = 0x6, +} +impl_from_enum_to_u32!(DmaTrfCmdSize); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for DmaTrfCmdSize { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + match value { + 0x6 => Ok(Self::Size256B), + _ => Err(EINVAL), + } + } +} + +/// Currently active core on a dual falcon/riscv (Peregrine) controller. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub(crate) enum PeregrineCoreSelect { + /// Falcon core is active. + #[default] + Falcon = 0, + /// RISC-V core is active. + Riscv = 1, +} +impl_from_enum_to_u32!(PeregrineCoreSelect); + +impl From<bool> for PeregrineCoreSelect { + fn from(value: bool) -> Self { + match value { + false => PeregrineCoreSelect::Falcon, + true => PeregrineCoreSelect::Riscv, + } + } +} + +/// Different types of memory present in a falcon core. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FalconMem { + /// Instruction Memory. + Imem, + /// Data Memory. + Dmem, +} + +/// Defines the Framebuffer Interface (FBIF) aperture type. +/// This determines the memory type for external memory access during a DMA transfer, which is +/// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifTarget { + /// VRAM. + #[default] + /// Local Framebuffer (GPU's VRAM memory). + LocalFb = 0, + /// Coherent system memory (System DRAM). + CoherentSysmem = 1, + /// Non-coherent system memory (System DRAM). + NoncoherentSysmem = 2, +} +impl_from_enum_to_u32!(FalconFbifTarget); + +// TODO[FPRI]: replace with `FromPrimitive`. +impl TryFrom<u8> for FalconFbifTarget { + type Error = Error; + + fn try_from(value: u8) -> Result<Self> { + let res = match value { + 0 => Self::LocalFb, + 1 => Self::CoherentSysmem, + 2 => Self::NoncoherentSysmem, + _ => return Err(EINVAL), + }; + + Ok(res) + } +} + +/// Type of memory addresses to use. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifMemType { + /// Virtual memory addresses. + #[default] + Virtual = 0, + /// Physical memory addresses. + Physical = 1, +} +impl_from_enum_to_u32!(FalconFbifMemType); + +/// Conversion from a single-bit register field. +impl From<bool> for FalconFbifMemType { + fn from(value: bool) -> Self { + match value { + false => Self::Virtual, + true => Self::Physical, + } + } +} + +/// Type used to represent the `PFALCON` registers address base for a given falcon engine. +pub(crate) struct PFalconBase(()); + +/// Type used to represent the `PFALCON2` registers address base for a given falcon engine. +pub(crate) struct PFalcon2Base(()); + +/// Trait defining the parameters of a given Falcon engine. +/// +/// Each engine provides one base for `PFALCON` and `PFALCON2` registers. The `ID` constant is used +/// to identify a given Falcon instance with register I/O methods. +pub(crate) trait FalconEngine: + Send + Sync + RegisterBase<PFalconBase> + RegisterBase<PFalcon2Base> + Sized +{ + /// Singleton of the engine, used to identify it with register I/O methods. + const ID: Self; +} + +/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). +#[derive(Debug, Clone)] +pub(crate) struct FalconLoadTarget { + /// Offset from the start of the source object to copy from. + pub(crate) src_start: u32, + /// Offset from the start of the destination memory to copy into. + pub(crate) dst_start: u32, + /// Number of bytes to copy. + pub(crate) len: u32, +} + +/// Parameters for the falcon boot ROM. +#[derive(Debug, Clone)] +pub(crate) struct FalconBromParams { + /// Offset in `DMEM`` of the firmware's signature. + pub(crate) pkc_data_offset: u32, + /// Mask of engines valid for this firmware. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, +} + +/// Trait for providing load parameters of falcon firmwares. +pub(crate) trait FalconLoadParams { + /// Returns the load parameters for `IMEM`. + fn imem_load_params(&self) -> FalconLoadTarget; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconLoadTarget; + + /// Returns the parameters to write into the BROM registers. + fn brom_params(&self) -> FalconBromParams; + + /// Returns the start address of the firmware. + fn boot_addr(&self) -> u32; +} + +/// Trait for a falcon firmware. +/// +/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA +/// object. +pub(crate) trait FalconFirmware: FalconLoadParams + Deref<Target = DmaObject> { + /// Engine on which this firmware is to be loaded. + type Target: FalconEngine; +} + +/// Contains the base parameters common to all Falcon instances. +pub(crate) struct Falcon<E: FalconEngine> { + hal: KBox<dyn FalconHal<E>>, + dev: ARef<device::Device>, +} + +impl<E: FalconEngine + 'static> Falcon<E> { + /// Create a new falcon instance. + /// + /// `need_riscv` is set to `true` if the caller expects the falcon to be a dual falcon/riscv + /// controller. + pub(crate) fn new( + dev: &device::Device, + chipset: Chipset, + bar: &Bar0, + need_riscv: bool, + ) -> Result<Self> { + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, &E::ID); + // Check that the revision and security model contain valid values. + let _ = hwcfg1.core_rev()?; + let _ = hwcfg1.security_model()?; + + if need_riscv { + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + if !hwcfg2.riscv() { + dev_err!( + dev, + "riscv support requested on a controller that does not support it\n" + ); + return Err(EINVAL); + } + } + + Ok(Self { + hal: hal::falcon_hal(chipset)?, + dev: dev.into(), + }) + } + + /// Wait for memory scrubbing to complete. + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { + // TIMEOUT: memory scrubbing should complete in less than 20ms. + util::wait_on(Delta::from_millis(20), || { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID).mem_scrubbing_done() { + Some(()) + } else { + None + } + }) + } + + /// Reset the falcon engine. + fn reset_eng(&self, bar: &Bar0) -> Result { + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + + // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set + // RESET_READY so a non-failing timeout is used. + let _ = util::wait_on(Delta::from_micros(150), || { + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, &E::ID); + if r.reset_ready() { + Some(()) + } else { + None + } + }); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(true)); + + // TODO[DLAY]: replace with udelay() or equivalent once available. + // TIMEOUT: falcon engine should not take more than 10us to reset. + let _: Result = util::wait_on(Delta::from_micros(10), || None); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, &E::ID, |v| v.set_reset(false)); + + self.reset_wait_mem_scrubbing(bar)?; + + Ok(()) + } + + /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. + pub(crate) fn reset(&self, bar: &Bar0) -> Result { + self.reset_eng(bar)?; + self.hal.select_core(self, bar)?; + self.reset_wait_mem_scrubbing(bar)?; + + regs::NV_PFALCON_FALCON_RM::default() + .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) + .write(bar, &E::ID); + + Ok(()) + } + + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's + /// `target_mem`. + /// + /// `sec` is set if the loaded firmware is expected to run in secure mode. + fn dma_wr<F: FalconFirmware<Target = E>>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: FalconLoadTarget, + sec: bool, + ) -> Result { + const DMA_LEN: u32 = 256; + + // For IMEM, we want to use the start offset as a virtual address tag for each page, since + // code addresses in the firmware (and the boot vector) are virtual. + // + // For DMEM we can fold the start offset into the DMA handle. + let (src_start, dma_start) = match target_mem { + FalconMem::Imem => (load_offsets.src_start, fw.dma_handle()), + FalconMem::Dmem => ( + 0, + fw.dma_handle_with_offset(load_offsets.src_start as usize)?, + ), + }; + if dma_start % DmaAddress::from(DMA_LEN) > 0 { + dev_err!( + self.dev, + "DMA transfer start addresses must be a multiple of {}", + DMA_LEN + ); + return Err(EINVAL); + } + + // DMA transfers can only be done in units of 256 bytes. Compute how many such transfers we + // need to perform. + let num_transfers = load_offsets.len.div_ceil(DMA_LEN); + + // Check that the area we are about to transfer is within the bounds of the DMA object. + // Upper limit of transfer is `(num_transfers * DMA_LEN) + load_offsets.src_start`. + match num_transfers + .checked_mul(DMA_LEN) + .and_then(|size| size.checked_add(load_offsets.src_start)) + { + None => { + dev_err!(self.dev, "DMA transfer length overflow"); + return Err(EOVERFLOW); + } + Some(upper_bound) if upper_bound as usize > fw.size() => { + dev_err!(self.dev, "DMA transfer goes beyond range of DMA object"); + return Err(EINVAL); + } + Some(_) => (), + }; + + // Set up the base source DMA address. + + regs::NV_PFALCON_FALCON_DMATRFBASE::default() + .set_base((dma_start >> 8) as u32) + .write(bar, &E::ID); + regs::NV_PFALCON_FALCON_DMATRFBASE1::default() + .set_base((dma_start >> 40) as u16) + .write(bar, &E::ID); + + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() + .set_size(DmaTrfCmdSize::Size256B) + .set_imem(target_mem == FalconMem::Imem) + .set_sec(if sec { 1 } else { 0 }); + + for pos in (0..num_transfers).map(|i| i * DMA_LEN) { + // Perform a transfer of size `DMA_LEN`. + regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() + .set_offs(load_offsets.dst_start + pos) + .write(bar, &E::ID); + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() + .set_offs(src_start + pos) + .write(bar, &E::ID); + cmd.write(bar, &E::ID); + + // Wait for the transfer to complete. + // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories + // should ever take that long. + util::wait_on(Delta::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, &E::ID); + if r.idle() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) + } + + /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn dma_load<F: FalconFirmware<Target = E>>(&self, bar: &Bar0, fw: &F) -> Result { + regs::NV_PFALCON_FBIF_CTL::alter(bar, &E::ID, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, &E::ID, 0, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.dma_wr(bar, fw, FalconMem::Imem, fw.imem_load_params(), true)?; + self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params(), true)?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, &E::ID); + + Ok(()) + } + + /// Runs the loaded firmware and waits for its completion. + /// + /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers + /// prior to running. + /// + /// Wait up to two seconds for the firmware to complete, and return its exit status read from + /// the `MBOX0` and `MBOX1` registers. + pub(crate) fn boot( + &self, + bar: &Bar0, + mbox0: Option<u32>, + mbox1: Option<u32>, + ) -> Result<(u32, u32)> { + if let Some(mbox0) = mbox0 { + regs::NV_PFALCON_FALCON_MAILBOX0::default() + .set_value(mbox0) + .write(bar, &E::ID); + } + + if let Some(mbox1) = mbox1 { + regs::NV_PFALCON_FALCON_MAILBOX1::default() + .set_value(mbox1) + .write(bar, &E::ID); + } + + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID).alias_en() { + true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() + .set_startcpu(true) + .write(bar, &E::ID), + false => regs::NV_PFALCON_FALCON_CPUCTL::default() + .set_startcpu(true) + .write(bar, &E::ID), + } + + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + util::wait_on(Delta::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, &E::ID); + if r.halted() { + Some(()) + } else { + None + } + })?; + + let (mbox0, mbox1) = ( + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, &E::ID).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, &E::ID).value(), + ); + + Ok((mbox0, mbox1)) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + pub(crate) fn signature_reg_fuse_version( + &self, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + self.hal + .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) + } +} diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs new file mode 100644 index 000000000000..f17599cb49fa --- /dev/null +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::{ + driver::Bar0, + falcon::{Falcon, FalconEngine, PFalcon2Base, PFalconBase}, + regs::{self, macros::RegisterBase}, +}; + +/// Type specifying the `Gsp` falcon engine. Cannot be instantiated. +pub(crate) struct Gsp(()); + +impl RegisterBase<PFalconBase> for Gsp { + const BASE: usize = 0x00110000; +} + +impl RegisterBase<PFalcon2Base> for Gsp { + const BASE: usize = 0x00111000; +} + +impl FalconEngine for Gsp { + const ID: Self = Gsp(()); +} + +impl Falcon<Gsp> { + /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to + /// allow GSP to signal CPU for processing new messages in message queue. + pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { + regs::NV_PFALCON_FALCON_IRQSCLR::default() + .set_swgen0(true) + .write(bar, &Gsp::ID); + } +} diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs new file mode 100644 index 000000000000..bba288455617 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{Falcon, FalconBromParams, FalconEngine}; +use crate::gpu::Chipset; + +mod ga102; + +/// Hardware Abstraction Layer for Falcon cores. +/// +/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] +/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing +/// registers. +pub(crate) trait FalconHal<E: FalconEngine>: Send + Sync { + /// Activates the Falcon core if the engine is a risvc/falcon dual engine. + fn select_core(&self, _falcon: &Falcon<E>, _bar: &Bar0) -> Result { + Ok(()) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32>; + + /// Program the boot ROM registers prior to starting a secure firmware. + fn program_brom(&self, falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result; +} + +/// Returns a boxed falcon HAL adequate for `chipset`. +/// +/// We use a heap-allocated trait object instead of a statically defined one because the +/// generic `FalconEngine` argument makes it difficult to define all the combinations +/// statically. +pub(super) fn falcon_hal<E: FalconEngine + 'static>( + chipset: Chipset, +) -> Result<KBox<dyn FalconHal<E>>> { + use Chipset::*; + + let hal = match chipset { + GA102 | GA103 | GA104 | GA106 | GA107 => { + KBox::new(ga102::Ga102::<E>::new(), GFP_KERNEL)? as KBox<dyn FalconHal<E>> + } + _ => return Err(ENOTSUPP), + }; + + Ok(hal) +} diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs new file mode 100644 index 000000000000..0b1cbe7853b3 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::marker::PhantomData; + +use kernel::device; +use kernel::prelude::*; +use kernel::time::Delta; + +use crate::driver::Bar0; +use crate::falcon::{ + Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, +}; +use crate::regs; +use crate::util; + +use super::FalconHal; + +fn select_core_ga102<E: FalconEngine>(bar: &Bar0) -> Result { + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); + if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { + regs::NV_PRISCV_RISCV_BCR_CTRL::default() + .set_core_select(PeregrineCoreSelect::Falcon) + .write(bar, &E::ID); + + // TIMEOUT: falcon core should take less than 10ms to report being enabled. + util::wait_on(Delta::from_millis(10), || { + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, &E::ID); + if r.valid() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) +} + +fn signature_reg_fuse_version_ga102( + dev: &device::Device, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, +) -> Result<u32> { + const NV_FUSE_OPT_FPF_SIZE: u8 = regs::NV_FUSE_OPT_FPF_SIZE as u8; + + // Each engine has 16 ucode version registers numbered from 1 to 16. + let ucode_idx = match ucode_id { + 1..=NV_FUSE_OPT_FPF_SIZE => (ucode_id - 1) as usize, + _ => { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + }; + + // `ucode_idx` is guaranteed to be in the range [0..15], making the `read` calls provable valid + // at build-time. + let reg_fuse_version = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::read(bar, ucode_idx).data() + } else if engine_id_mask & 0x0004 != 0 { + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::read(bar, ucode_idx).data() + } else if engine_id_mask & 0x0400 != 0 { + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::read(bar, ucode_idx).data() + } else { + dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); + return Err(EINVAL); + }; + + // TODO[NUMM]: replace with `last_set_bit` once it lands. + Ok(u16::BITS - reg_fuse_version.leading_zeros()) +} + +fn program_brom_ga102<E: FalconEngine>(bar: &Bar0, params: &FalconBromParams) -> Result { + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() + .set_value(params.pkc_data_offset) + .write(bar, &E::ID, 0); + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() + .set_value(u32::from(params.engine_id_mask)) + .write(bar, &E::ID); + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() + .set_ucode_id(params.ucode_id) + .write(bar, &E::ID); + regs::NV_PFALCON2_FALCON_MOD_SEL::default() + .set_algo(FalconModSelAlgo::Rsa3k) + .write(bar, &E::ID); + + Ok(()) +} + +pub(super) struct Ga102<E: FalconEngine>(PhantomData<E>); + +impl<E: FalconEngine> Ga102<E> { + pub(super) fn new() -> Self { + Self(PhantomData) + } +} + +impl<E: FalconEngine> FalconHal<E> for Ga102<E> { + fn select_core(&self, _falcon: &Falcon<E>, bar: &Bar0) -> Result { + select_core_ga102::<E>(bar) + } + + fn signature_reg_fuse_version( + &self, + falcon: &Falcon<E>, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result<u32> { + signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id) + } + + fn program_brom(&self, _falcon: &Falcon<E>, bar: &Bar0, params: &FalconBromParams) -> Result { + program_brom_ga102::<E>(bar, params) + } +} diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs new file mode 100644 index 000000000000..815786c8480d --- /dev/null +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::falcon::{FalconEngine, PFalcon2Base, PFalconBase}; +use crate::regs::macros::RegisterBase; + +/// Type specifying the `Sec2` falcon engine. Cannot be instantiated. +pub(crate) struct Sec2(()); + +impl RegisterBase<PFalconBase> for Sec2 { + const BASE: usize = 0x00840000; +} + +impl RegisterBase<PFalcon2Base> for Sec2 { + const BASE: usize = 0x00841000; +} + +impl FalconEngine for Sec2 { + const ID: Self = Sec2(()); +} diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs new file mode 100644 index 000000000000..27d9edab8347 --- /dev/null +++ b/drivers/gpu/nova-core/fb.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::ops::Range; + +use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; +use kernel::sizes::*; +use kernel::sync::aref::ARef; +use kernel::{dev_warn, device}; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; +use crate::regs; + +mod hal; + +/// Type holding the sysmem flush memory page, a page of memory to be written into the +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency. +/// +/// A system memory page is required for `sysmembar`, which is a GPU-initiated hardware +/// memory-barrier operation that flushes all pending GPU-side memory writes that were done through +/// PCIE to system memory. It is required for falcons to be reset as the reset operation involves a +/// reset handshake. When the falcon acknowledges a reset, it writes into system memory. To ensure +/// this write is visible to the host and prevent driver timeouts, the falcon must perform a +/// sysmembar operation to flush its writes. +/// +/// Because of this, the sysmem flush memory page must be registered as early as possible during +/// driver initialization, and before any falcon is reset. +/// +/// Users are responsible for manually calling [`Self::unregister`] before dropping this object, +/// otherwise the GPU might still use it even after it has been freed. +pub(crate) struct SysmemFlush { + /// Chipset we are operating on. + chipset: Chipset, + device: ARef<device::Device>, + /// Keep the page alive as long as we need it. + page: DmaObject, +} + +impl SysmemFlush { + /// Allocate a memory page and register it as the sysmem flush page. + pub(crate) fn register( + dev: &device::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + ) -> Result<Self> { + let page = DmaObject::new(dev, kernel::page::PAGE_SIZE)?; + + hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; + + Ok(Self { + chipset, + device: dev.into(), + page, + }) + } + + /// Unregister the managed sysmem flush page. + /// + /// In order to gracefully tear down the GPU, users must make sure to call this method before + /// dropping the object. + pub(crate) fn unregister(&self, bar: &Bar0) { + let hal = hal::fb_hal(self.chipset); + + if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() { + let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| { + dev_warn!( + &self.device, + "failed to unregister sysmem flush page: {:?}", + e + ) + }); + } else { + // Another page has been registered after us for some reason - warn as this is a bug. + dev_warn!( + &self.device, + "attempt to unregister a sysmem flush page that is not active\n" + ); + } + } +} + +/// Layout of the GPU framebuffer memory. +/// +/// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. +#[derive(Debug)] +#[expect(dead_code)] +pub(crate) struct FbLayout { + pub(crate) fb: Range<u64>, + pub(crate) vga_workspace: Range<u64>, + pub(crate) frts: Range<u64>, +} + +impl FbLayout { + /// Computes the FB layout. + pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result<Self> { + let hal = hal::fb_hal(chipset); + + let fb = { + let fb_size = hal.vidmem_size(bar); + + 0..fb_size + }; + + let vga_workspace = { + let vga_base = { + const NV_PRAMIN_SIZE: u64 = SZ_1M as u64; + let base = fb.end - NV_PRAMIN_SIZE; + + if hal.supports_display(bar) { + match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { + Some(addr) => { + if addr < base { + const VBIOS_WORKSPACE_SIZE: u64 = SZ_128K as u64; + + // Point workspace address to end of framebuffer. + fb.end - VBIOS_WORKSPACE_SIZE + } else { + addr + } + } + None => base, + } + } else { + base + } + }; + + vga_base..fb.end + }; + + let frts = { + const FRTS_DOWN_ALIGN: Alignment = Alignment::new::<SZ_128K>(); + const FRTS_SIZE: u64 = SZ_1M as u64; + let frts_base = vga_workspace.start.align_down(FRTS_DOWN_ALIGN) - FRTS_SIZE; + + frts_base..frts_base + FRTS_SIZE + }; + + Ok(Self { + fb, + vga_workspace, + frts, + }) + } +} diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs new file mode 100644 index 000000000000..2f914948bb9a --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::gpu::Chipset; + +mod ga100; +mod ga102; +mod tu102; + +pub(crate) trait FbHal { + /// Returns the address of the currently-registered sysmem flush page. + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64; + + /// Register `addr` as the address of the sysmem flush page. + /// + /// This might fail if the address is too large for the receiving register. + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; + + /// Returns `true` is display is supported. + fn supports_display(&self, bar: &Bar0) -> bool; + + /// Returns the VRAM size, in bytes. + fn vidmem_size(&self, bar: &Bar0) -> u64; +} + +/// Returns the HAL corresponding to `chipset`. +pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal { + use Chipset::*; + + match chipset { + TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL, + GA100 => ga100::GA100_HAL, + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + ga102::GA102_HAL + } + } +} diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs new file mode 100644 index 000000000000..871c42bf033a --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct Ga100; + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; + +pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) + << FLUSH_SYSMEM_ADDR_SHIFT_HI +} + +pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() + .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) + .write(bar); + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); +} + +pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { + !regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + +/// Shift applied to the sysmem address before it is written into +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`, +const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40; + +impl FbHal for Ga100 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + super::tu102::vidmem_size_gp102(bar) + } +} + +const GA100: Ga100 = Ga100; +pub(super) const GA100_HAL: &dyn FbHal = &GA100; diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs new file mode 100644 index 000000000000..a73b77e39715 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +fn vidmem_size_ga102(bar: &Bar0) -> u64 { + regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() +} + +struct Ga102; + +impl FbHal for Ga102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + super::ga100::read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + super::ga100::write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + super::ga100::display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_ga102(bar) + } +} + +const GA102: Ga102 = Ga102; +pub(super) const GA102_HAL: &dyn FbHal = &GA102; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs new file mode 100644 index 000000000000..b022c781caf4 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; +use kernel::prelude::*; + +/// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, +/// to be used by HALs. +pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; + +pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT +} + +pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { + // Check that the address doesn't overflow the receiving 32-bit register. + if addr >> (u32::BITS + FLUSH_SYSMEM_ADDR_SHIFT) == 0 { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); + + Ok(()) + } else { + Err(EINVAL) + } +} + +pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { + !regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + +pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { + regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE::read(bar).usable_fb_size() +} + +struct Tu102; + +impl FbHal for Tu102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_gm107(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_gm107(bar, addr) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_gm107(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_gp102(bar) + } +} + +const TU102: Tu102 = Tu102; +pub(super) const TU102_HAL: &dyn FbHal = &TU102; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4b8a38358a4f..4179a74a2342 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -3,41 +3,197 @@ //! Contains structures and functions dedicated to the parsing, building and patching of firmwares //! to be loaded into a given execution unit. +use core::marker::PhantomData; +use core::mem::size_of; + use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use kernel::transmute::FromBytes; +use crate::dma::DmaObject; +use crate::falcon::FalconFirmware; use crate::gpu; -use crate::gpu::Chipset; -pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; +pub(crate) mod booter; +pub(crate) mod fwsec; +pub(crate) mod gsp; +pub(crate) mod riscv; + +pub(crate) const FIRMWARE_VERSION: &str = "570.144"; + +/// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`. +fn request_firmware( + dev: &device::Device, + chipset: gpu::Chipset, + name: &str, + ver: &str, +) -> Result<firmware::Firmware> { + let chip_name = chipset.name(); + + CString::try_from_fmt(fmt!("nvidia/{chip_name}/gsp/{name}-{ver}.bin")) + .and_then(|path| firmware::Firmware::request(&path, dev)) +} -/// Structure encapsulating the firmware blobs required for the GPU to operate. -#[expect(dead_code)] -pub(crate) struct Firmware { - booter_load: firmware::Firmware, - booter_unload: firmware::Firmware, - bootloader: firmware::Firmware, - gsp: firmware::Firmware, +/// Structure used to describe some firmwares, notably FWSEC-FRTS. +#[repr(C)] +#[derive(Debug, Clone)] +pub(crate) struct FalconUCodeDescV3 { + /// Header defined by `NV_BIT_FALCON_UCODE_DESC_HEADER_VDESC*` in OpenRM. + hdr: u32, + /// Stored size of the ucode after the header. + stored_size: u32, + /// Offset in `DMEM` at which the signature is expected to be found. + pub(crate) pkc_data_offset: u32, + /// Offset after the code segment at which the app headers are located. + pub(crate) interface_offset: u32, + /// Base address at which to load the code segment into `IMEM`. + pub(crate) imem_phys_base: u32, + /// Size in bytes of the code to copy into `IMEM`. + pub(crate) imem_load_size: u32, + /// Virtual `IMEM` address (i.e. `tag`) at which the code should start. + pub(crate) imem_virt_base: u32, + /// Base address at which to load the data segment into `DMEM`. + pub(crate) dmem_phys_base: u32, + /// Size in bytes of the data to copy into `DMEM`. + pub(crate) dmem_load_size: u32, + /// Mask of the falcon engines on which this firmware can run. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, + /// Number of signatures in this firmware. + pub(crate) signature_count: u8, + /// Versions of the signatures, used to infer a valid signature to use. + pub(crate) signature_versions: u16, + _reserved: u16, } -impl Firmware { - pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result<Firmware> { - let mut chip_name = CString::try_from_fmt(fmt!("{}", chipset))?; - chip_name.make_ascii_lowercase(); +impl FalconUCodeDescV3 { + /// Returns the size in bytes of the header. + pub(crate) fn size(&self) -> usize { + const HDR_SIZE_SHIFT: u32 = 16; + const HDR_SIZE_MASK: u32 = 0xffff0000; + + ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT) as usize + } +} + +/// Trait implemented by types defining the signed state of a firmware. +trait SignedState {} + +/// Type indicating that the firmware must be signed before it can be used. +struct Unsigned; +impl SignedState for Unsigned {} + +/// Type indicating that the firmware is signed and ready to be loaded. +struct Signed; +impl SignedState for Signed {} + +/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. +/// +/// This is module-local and meant for sub-modules to use internally. +/// +/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature +/// before it can be loaded (with an exception for development hardware). The +/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the +/// firmware to its [`Signed`] state. +struct FirmwareDmaObject<F: FalconFirmware, S: SignedState>(DmaObject, PhantomData<(F, S)>); + +/// Trait for signatures to be patched directly into a given firmware. +/// +/// This is module-local and meant for sub-modules to use internally. +trait FirmwareSignature<F: FalconFirmware>: AsRef<[u8]> {} + +impl<F: FalconFirmware> FirmwareDmaObject<F, Unsigned> { + /// Patches the firmware at offset `sig_base_img` with `signature`. + fn patch_signature<S: FirmwareSignature<F>>( + mut self, + signature: &S, + sig_base_img: usize, + ) -> Result<FirmwareDmaObject<F, Signed>> { + let signature_bytes = signature.as_ref(); + if sig_base_img + signature_bytes.len() > self.0.size() { + return Err(EINVAL); + } + + // SAFETY: We are the only user of this object, so there cannot be any race. + let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; - let request = |name_| { - CString::try_from_fmt(fmt!("nvidia/{}/gsp/{}-{}.bin", &*chip_name, name_, ver)) - .and_then(|path| firmware::Firmware::request(&path, dev)) + // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. + unsafe { + core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) }; - Ok(Firmware { - booter_load: request("booter_load")?, - booter_unload: request("booter_unload")?, - bootloader: request("bootloader")?, - gsp: request("gsp")?, - }) + Ok(FirmwareDmaObject(self.0, PhantomData)) + } + + /// Mark the firmware as signed without patching it. + /// + /// This method is used to explicitly confirm that we do not need to sign the firmware, while + /// allowing us to continue as if it was. This is typically only needed for development + /// hardware. + fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> { + FirmwareDmaObject(self.0, PhantomData) + } +} + +/// Header common to most firmware files. +#[repr(C)] +#[derive(Debug, Clone)] +struct BinHdr { + /// Magic number, must be `0x10de`. + bin_magic: u32, + /// Version of the header. + bin_ver: u32, + /// Size in bytes of the binary (to be ignored). + bin_size: u32, + /// Offset of the start of the application-specific header. + header_offset: u32, + /// Offset of the start of the data payload. + data_offset: u32, + /// Size in bytes of the data payload. + data_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for BinHdr {} + +// A firmware blob starting with a `BinHdr`. +struct BinFirmware<'a> { + hdr: BinHdr, + fw: &'a [u8], +} + +impl<'a> BinFirmware<'a> { + /// Interpret `fw` as a firmware image starting with a [`BinHdr`], and returns the + /// corresponding [`BinFirmware`] that can be used to extract its payload. + fn new(fw: &'a firmware::Firmware) -> Result<Self> { + const BIN_MAGIC: u32 = 0x10de; + let fw = fw.data(); + + fw.get(0..size_of::<BinHdr>()) + // Extract header. + .and_then(BinHdr::from_bytes_copy) + // Validate header. + .and_then(|hdr| { + if hdr.bin_magic == BIN_MAGIC { + Some(hdr) + } else { + None + } + }) + .map(|hdr| Self { hdr, fw }) + .ok_or(EINVAL) + } + + /// Returns the data payload of the firmware, or `None` if the data range is out of bounds of + /// the firmware image. + fn data(&self) -> Option<&[u8]> { + let fw_start = self.hdr.data_offset as usize; + let fw_size = self.hdr.data_size as usize; + + self.fw.get(fw_start..fw_start + fw_size) } } @@ -71,8 +227,8 @@ impl<const N: usize> ModInfoBuilder<N> { let mut this = Self(firmware::ModInfoBuilder::new(module_name)); let mut i = 0; - while i < gpu::Chipset::NAMES.len() { - this = this.make_entry_chipset(gpu::Chipset::NAMES[i]); + while i < gpu::Chipset::ALL.len() { + this = this.make_entry_chipset(gpu::Chipset::ALL[i].name()); i += 1; } diff --git a/drivers/gpu/nova-core/firmware/booter.rs b/drivers/gpu/nova-core/firmware/booter.rs new file mode 100644 index 000000000000..b4ff1b17e4a0 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/booter.rs @@ -0,0 +1,375 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for loading and patching the `Booter` firmware. `Booter` is a Heavy Secured firmware +//! running on [`Sec2`], that is used on Turing/Ampere to load the GSP firmware into the GSP falcon +//! (and optionally unload it through a separate firmware image). + +use core::marker::PhantomData; +use core::mem::size_of; +use core::ops::Deref; + +use kernel::device; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::sec2::Sec2; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{BinFirmware, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::gpu::Chipset; + +/// Local convenience function to return a copy of `S` by reinterpreting the bytes starting at +/// `offset` in `slice`. +fn frombytes_at<S: FromBytes + Sized>(slice: &[u8], offset: usize) -> Result<S> { + slice + .get(offset..offset + size_of::<S>()) + .and_then(S::from_bytes_copy) + .ok_or(EINVAL) +} + +/// Heavy-Secured firmware header. +/// +/// Such firmwares have an application-specific payload that needs to be patched with a given +/// signature. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsHeaderV2 { + /// Offset to the start of the signatures. + sig_prod_offset: u32, + /// Size in bytes of the signatures. + sig_prod_size: u32, + /// Offset to a `u32` containing the location at which to patch the signature in the microcode + /// image. + patch_loc_offset: u32, + /// Offset to a `u32` containing the index of the signature to patch. + patch_sig_offset: u32, + /// Start offset to the signature metadata. + meta_data_offset: u32, + /// Size in bytes of the signature metadata. + meta_data_size: u32, + /// Offset to a `u32` containing the number of signatures in the signatures section. + num_sig_offset: u32, + /// Offset of the application-specific header. + header_offset: u32, + /// Size in bytes of the application-specific header. + header_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsHeaderV2 {} + +/// Heavy-Secured Firmware image container. +/// +/// This provides convenient access to the fields of [`HsHeaderV2`] that are actually indices to +/// read from in the firmware data. +struct HsFirmwareV2<'a> { + hdr: HsHeaderV2, + fw: &'a [u8], +} + +impl<'a> HsFirmwareV2<'a> { + /// Interprets the header of `bin_fw` as a [`HsHeaderV2`] and returns an instance of + /// `HsFirmwareV2` for further parsing. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'a>) -> Result<Self> { + frombytes_at::<HsHeaderV2>(bin_fw.fw, bin_fw.hdr.header_offset as usize) + .map(|hdr| Self { hdr, fw: bin_fw.fw }) + } + + /// Returns the location at which the signatures should be patched in the microcode image. + /// + /// Fails if the offset of the patch location is outside the bounds of the firmware + /// image. + fn patch_location(&self) -> Result<u32> { + frombytes_at::<u32>(self.fw, self.hdr.patch_loc_offset as usize) + } + + /// Returns an iterator to the signatures of the firmware. The iterator can be empty if the + /// firmware is unsigned. + /// + /// Fails if the pointed signatures are outside the bounds of the firmware image. + fn signatures_iter(&'a self) -> Result<impl Iterator<Item = BooterSignature<'a>>> { + let num_sig = frombytes_at::<u32>(self.fw, self.hdr.num_sig_offset as usize)?; + let iter = match self.hdr.sig_prod_size.checked_div(num_sig) { + // If there are no signatures, return an iterator that will yield zero elements. + None => (&[] as &[u8]).chunks_exact(1), + Some(sig_size) => { + let patch_sig = frombytes_at::<u32>(self.fw, self.hdr.patch_sig_offset as usize)?; + let signatures_start = (self.hdr.sig_prod_offset + patch_sig) as usize; + + self.fw + // Get signatures range. + .get(signatures_start..signatures_start + self.hdr.sig_prod_size as usize) + .ok_or(EINVAL)? + .chunks_exact(sig_size as usize) + } + }; + + // Map the byte slices into signatures. + Ok(iter.map(BooterSignature)) + } +} + +/// Signature parameters, as defined in the firmware. +#[repr(C)] +struct HsSignatureParams { + /// Fuse version to use. + fuse_ver: u32, + /// Mask of engine IDs this firmware applies to. + engine_id_mask: u32, + /// ID of the microcode. + ucode_id: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsSignatureParams {} + +impl HsSignatureParams { + /// Returns the signature parameters contained in `hs_fw`. + /// + /// Fails if the meta data parameter of `hs_fw` is outside the bounds of the firmware image, or + /// if its size doesn't match that of [`HsSignatureParams`]. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + let start = hs_fw.hdr.meta_data_offset as usize; + let end = start + .checked_add(hs_fw.hdr.meta_data_size as usize) + .ok_or(EINVAL)?; + + hs_fw + .fw + .get(start..end) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// Header for code and data load offsets. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2 { + // Offset at which the code starts. + os_code_offset: u32, + // Total size of the code, for all apps. + os_code_size: u32, + // Offset at which the data starts. + os_data_offset: u32, + // Size of the data. + os_data_size: u32, + // Number of apps following this header. Each app is described by a [`HsLoadHeaderV2App`]. + num_apps: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2 {} + +impl HsLoadHeaderV2 { + /// Returns the load header contained in `hs_fw`. + /// + /// Fails if the header pointed at by `hs_fw` is not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>) -> Result<Self> { + frombytes_at::<Self>(hs_fw.fw, hs_fw.hdr.header_offset as usize) + } +} + +/// Header for app code loader. +#[repr(C)] +#[derive(Debug, Clone)] +struct HsLoadHeaderV2App { + /// Offset at which to load the app code. + offset: u32, + /// Length in bytes of the app code. + len: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for HsLoadHeaderV2App {} + +impl HsLoadHeaderV2App { + /// Returns the [`HsLoadHeaderV2App`] for app `idx` of `hs_fw`. + /// + /// Fails if `idx` is larger than the number of apps declared in `hs_fw`, or if the header is + /// not within the bounds of the firmware image. + fn new(hs_fw: &HsFirmwareV2<'_>, idx: u32) -> Result<Self> { + let load_hdr = HsLoadHeaderV2::new(hs_fw)?; + if idx >= load_hdr.num_apps { + Err(EINVAL) + } else { + frombytes_at::<Self>( + hs_fw.fw, + (hs_fw.hdr.header_offset as usize) + // Skip the load header... + .checked_add(size_of::<HsLoadHeaderV2>()) + // ... and jump to app header `idx`. + .and_then(|offset| { + offset.checked_add((idx as usize).checked_mul(size_of::<Self>())?) + }) + .ok_or(EINVAL)?, + ) + } + } +} + +/// Signature for Booter firmware. Their size is encoded into the header and not known a compile +/// time, so we just wrap a byte slices on which we can implement [`FirmwareSignature`]. +struct BooterSignature<'a>(&'a [u8]); + +impl<'a> AsRef<[u8]> for BooterSignature<'a> { + fn as_ref(&self) -> &[u8] { + self.0 + } +} + +impl<'a> FirmwareSignature<BooterFirmware> for BooterSignature<'a> {} + +/// The `Booter` loader firmware, responsible for loading the GSP. +pub(crate) struct BooterFirmware { + // Load parameters for `IMEM` falcon memory. + imem_load_target: FalconLoadTarget, + // Load parameters for `DMEM` falcon memory. + dmem_load_target: FalconLoadTarget, + // BROM falcon parameters. + brom_params: FalconBromParams, + // Device-mapped firmware image. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FirmwareDmaObject<BooterFirmware, Unsigned> { + fn new_booter(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> { + DmaObject::from_data(dev, data).map(|ucode| Self(ucode, PhantomData)) + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub(crate) enum BooterKind { + Loader, + #[expect(unused)] + Unloader, +} + +impl BooterFirmware { + /// Parses the Booter firmware contained in `fw`, and patches the correct signature so it is + /// ready to be loaded and run on `falcon`. + pub(crate) fn new( + dev: &device::Device<device::Bound>, + kind: BooterKind, + chipset: Chipset, + ver: &str, + falcon: &Falcon<<Self as FalconFirmware>::Target>, + bar: &Bar0, + ) -> Result<Self> { + let fw_name = match kind { + BooterKind::Loader => "booter_load", + BooterKind::Unloader => "booter_unload", + }; + let fw = super::request_firmware(dev, chipset, fw_name, ver)?; + let bin_fw = BinFirmware::new(&fw)?; + + // The binary firmware embeds a Heavy-Secured firmware. + let hs_fw = HsFirmwareV2::new(&bin_fw)?; + + // The Heavy-Secured firmware embeds a firmware load descriptor. + let load_hdr = HsLoadHeaderV2::new(&hs_fw)?; + + // Offset in `ucode` where to patch the signature. + let patch_loc = hs_fw.patch_location()?; + + let sig_params = HsSignatureParams::new(&hs_fw)?; + let brom_params = FalconBromParams { + // `load_hdr.os_data_offset` is an absolute index, but `pkc_data_offset` is from the + // signature patch location. + pkc_data_offset: patch_loc + .checked_sub(load_hdr.os_data_offset) + .ok_or(EINVAL)?, + engine_id_mask: u16::try_from(sig_params.engine_id_mask).map_err(|_| EINVAL)?, + ucode_id: u8::try_from(sig_params.ucode_id).map_err(|_| EINVAL)?, + }; + let app0 = HsLoadHeaderV2App::new(&hs_fw, 0)?; + + // Object containing the firmware microcode to be signature-patched. + let ucode = bin_fw + .data() + .ok_or(EINVAL) + .and_then(|data| FirmwareDmaObject::<Self, _>::new_booter(dev, data))?; + + let ucode_signed = { + let mut signatures = hs_fw.signatures_iter()?.peekable(); + + if signatures.peek().is_none() { + // If there are no signatures, then the firmware is unsigned. + ucode.no_patch_signature() + } else { + // Obtain the version from the fuse register, and extract the corresponding + // signature. + let reg_fuse_version = falcon.signature_reg_fuse_version( + bar, + brom_params.engine_id_mask, + brom_params.ucode_id, + )?; + + // `0` means the last signature should be used. + const FUSE_VERSION_USE_LAST_SIG: u32 = 0; + let signature = match reg_fuse_version { + FUSE_VERSION_USE_LAST_SIG => signatures.last(), + // Otherwise hardware fuse version needs to be subtracted to obtain the index. + reg_fuse_version => { + let Some(idx) = sig_params.fuse_ver.checked_sub(reg_fuse_version) else { + dev_err!(dev, "invalid fuse version for Booter firmware\n"); + return Err(EINVAL); + }; + signatures.nth(idx as usize) + } + } + .ok_or(EINVAL)?; + + ucode.patch_signature(&signature, patch_loc as usize)? + } + }; + + Ok(Self { + imem_load_target: FalconLoadTarget { + src_start: app0.offset, + dst_start: 0, + len: app0.len, + }, + dmem_load_target: FalconLoadTarget { + src_start: load_hdr.os_data_offset, + dst_start: 0, + len: load_hdr.os_data_size, + }, + brom_params, + ucode: ucode_signed, + }) + } +} + +impl FalconLoadParams for BooterFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + self.imem_load_target.clone() + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + self.dmem_load_target.clone() + } + + fn brom_params(&self) -> FalconBromParams { + self.brom_params.clone() + } + + fn boot_addr(&self) -> u32 { + self.imem_load_target.src_start + } +} + +impl Deref for BooterFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for BooterFirmware { + type Target = Sec2; +} diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs new file mode 100644 index 000000000000..8edbb5c0572c --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! FWSEC is a High Secure firmware that is extracted from the BIOS and performs the first step of +//! the GSP startup by creating the WPR2 memory region and copying critical areas of the VBIOS into +//! it after authenticating them, ensuring they haven't been tampered with. It runs on the GSP +//! falcon. +//! +//! Before being run, it needs to be patched in two areas: +//! +//! - The command to be run, as this firmware can perform several tasks ; +//! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. + +use core::marker::PhantomData; +use core::mem::{align_of, size_of}; +use core::ops::Deref; + +use kernel::device::{self, Device}; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::gsp::Gsp; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{FalconUCodeDescV3, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::vbios::Vbios; + +const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; + +#[repr(C)] +#[derive(Debug)] +struct FalconAppifHdrV1 { + version: u8, + header_size: u8, + entry_size: u8, + entry_count: u8, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifHdrV1 {} + +#[repr(C, packed)] +#[derive(Debug)] +struct FalconAppifV1 { + id: u32, + dmem_base: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifV1 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FalconAppifDmemmapperV3 { + signature: u32, + version: u16, + size: u16, + cmd_in_buffer_offset: u32, + cmd_in_buffer_size: u32, + cmd_out_buffer_offset: u32, + cmd_out_buffer_size: u32, + nvf_img_data_buffer_offset: u32, + nvf_img_data_buffer_size: u32, + printf_buffer_hdr: u32, + ucode_build_time_stamp: u32, + ucode_signature: u32, + init_cmd: u32, + ucode_feature: u32, + ucode_cmd_mask0: u32, + ucode_cmd_mask1: u32, + multi_tgt_tbl: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifDmemmapperV3 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct ReadVbios { + ver: u32, + hdr: u32, + addr: u64, + size: u32, + flags: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for ReadVbios {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FrtsRegion { + ver: u32, + hdr: u32, + addr: u32, + size: u32, + ftype: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsRegion {} + +const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; + +#[repr(C, packed)] +struct FrtsCmd { + read_vbios: ReadVbios, + frts_region: FrtsRegion, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsCmd {} + +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; + +/// Command for the [`FwsecFirmware`] to execute. +pub(crate) enum FwsecCommand { + /// Asks [`FwsecFirmware`] to carve out the WPR2 area and place a verified copy of the VBIOS + /// image into it. + Frts { frts_addr: u64, frts_size: u64 }, + /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. + #[expect(dead_code)] + Sb, +} + +/// Size of the signatures used in FWSEC. +const BCRT30_RSA3K_SIG_SIZE: usize = 384; + +/// A single signature that can be patched into a FWSEC image. +#[repr(transparent)] +pub(crate) struct Bcrt30Rsa3kSignature([u8; BCRT30_RSA3K_SIG_SIZE]); + +/// SAFETY: A signature is just an array of bytes. +unsafe impl FromBytes for Bcrt30Rsa3kSignature {} + +impl From<[u8; BCRT30_RSA3K_SIG_SIZE]> for Bcrt30Rsa3kSignature { + fn from(sig: [u8; BCRT30_RSA3K_SIG_SIZE]) -> Self { + Self(sig) + } +} + +impl AsRef<[u8]> for Bcrt30Rsa3kSignature { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl FirmwareSignature<FwsecFirmware> for Bcrt30Rsa3kSignature {} + +/// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement +/// [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not written for as long as the +/// returned reference is alive. +/// +/// TODO[TRSM][COHA]: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is +/// available and we have a way to transmute objects implementing FromBytes, e.g.: +/// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ +unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( + fw: &'a DmaObject, + offset: usize, +) -> Result<&'b T> { + if offset + size_of::<T>() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr() as usize + offset) % align_of::<T>() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &*(fw.start_ptr().add(offset).cast::<T>()) }) +} + +/// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must +/// implement [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not read or written for as long as +/// the returned reference is alive. +unsafe fn transmute_mut<'a, 'b, T: Sized + FromBytes>( + fw: &'a mut DmaObject, + offset: usize, +) -> Result<&'b mut T> { + if offset + size_of::<T>() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr_mut() as usize + offset) % align_of::<T>() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &mut *(fw.start_ptr_mut().add(offset).cast::<T>()) }) +} + +/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. +/// +/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. +pub(crate) struct FwsecFirmware { + /// Descriptor of the firmware. + desc: FalconUCodeDescV3, + /// GPU-accessible DMA object containing the firmware. + ucode: FirmwareDmaObject<Self, Signed>, +} + +impl FalconLoadParams for FwsecFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: 0, + dst_start: self.desc.imem_phys_base, + len: self.desc.imem_load_size, + } + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: self.desc.imem_load_size, + dst_start: self.desc.dmem_phys_base, + len: self.desc.dmem_load_size, + } + } + + fn brom_params(&self) -> FalconBromParams { + FalconBromParams { + pkc_data_offset: self.desc.pkc_data_offset, + engine_id_mask: self.desc.engine_id_mask, + ucode_id: self.desc.ucode_id, + } + } + + fn boot_addr(&self) -> u32 { + 0 + } +} + +impl Deref for FwsecFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for FwsecFirmware { + type Target = Gsp; +} + +impl FirmwareDmaObject<FwsecFirmware, Unsigned> { + fn new_fwsec(dev: &Device<device::Bound>, bios: &Vbios, cmd: FwsecCommand) -> Result<Self> { + let desc = bios.fwsec_image().header()?; + let ucode = bios.fwsec_image().ucode(desc)?; + let mut dma_object = DmaObject::from_data(dev, ucode)?; + + let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; + // SAFETY: we have exclusive access to `dma_object`. + let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; + + if hdr.version != 1 { + return Err(EINVAL); + } + + // Find the DMEM mapper section in the firmware. + for i in 0..hdr.entry_count as usize { + let app: &FalconAppifV1 = + // SAFETY: we have exclusive access to `dma_object`. + unsafe { + transmute( + &dma_object, + hdr_offset + hdr.header_size as usize + i * hdr.entry_size as usize + ) + }?; + + if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { + continue; + } + + // SAFETY: we have exclusive access to `dma_object`. + let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + app.dmem_base) as usize, + ) + }?; + + // SAFETY: we have exclusive access to `dma_object`. + let frts_cmd: &mut FrtsCmd = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + dmem_mapper.cmd_in_buffer_offset) as usize, + ) + }?; + + frts_cmd.read_vbios = ReadVbios { + ver: 1, + hdr: size_of::<ReadVbios>() as u32, + addr: 0, + size: 0, + flags: 2, + }; + + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { + frts_addr, + frts_size, + } => { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: size_of::<FrtsRegion>() as u32, + addr: (frts_addr >> 12) as u32, + size: (frts_size >> 12) as u32, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + + NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS + } + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + + // Return early as we found and patched the DMEMMAPPER region. + return Ok(Self(dma_object, PhantomData)); + } + + Err(ENOTSUPP) + } +} + +impl FwsecFirmware { + /// Extract the Fwsec firmware from `bios` and patch it to run on `falcon` with the `cmd` + /// command. + pub(crate) fn new( + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + cmd: FwsecCommand, + ) -> Result<Self> { + let ucode_dma = FirmwareDmaObject::<Self, _>::new_fwsec(dev, bios, cmd)?; + + // Patch signature if needed. + let desc = bios.fwsec_image().header()?; + let ucode_signed = if desc.signature_count != 0 { + let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; + let desc_sig_versions = u32::from(desc.signature_versions); + let reg_fuse_version = + falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; + dev_dbg!( + dev, + "desc_sig_versions: {:#x}, reg_fuse_version: {}\n", + desc_sig_versions, + reg_fuse_version + ); + let signature_idx = { + let reg_fuse_version_bit = 1 << reg_fuse_version; + + // Check if the fuse version is supported by the firmware. + if desc_sig_versions & reg_fuse_version_bit == 0 { + dev_err!( + dev, + "no matching signature: {:#x} {:#x}\n", + reg_fuse_version_bit, + desc_sig_versions, + ); + return Err(EINVAL); + } + + // `desc_sig_versions` has one bit set per included signature. Thus, the index of + // the signature to patch is the number of bits in `desc_sig_versions` set to `1` + // before `reg_fuse_version_bit`. + + // Mask of the bits of `desc_sig_versions` to preserve. + let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); + + (desc_sig_versions & reg_fuse_version_mask).count_ones() as usize + }; + + dev_dbg!(dev, "patching signature with index {}\n", signature_idx); + let signature = bios + .fwsec_image() + .sigs(desc) + .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; + + ucode_dma.patch_signature(signature, sig_base_img)? + } else { + ucode_dma.no_patch_signature() + }; + + Ok(FwsecFirmware { + desc: desc.clone(), + ucode: ucode_signed, + }) + } + + /// Loads the FWSEC firmware into `falcon` and execute it. + pub(crate) fn run( + &self, + dev: &Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .dma_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } +} diff --git a/drivers/gpu/nova-core/firmware/gsp.rs b/drivers/gpu/nova-core/firmware/gsp.rs new file mode 100644 index 000000000000..9b70095434c6 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/gsp.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::mem::size_of_val; + +use kernel::device; +use kernel::dma::{DataDirection, DmaAddress}; +use kernel::kvec; +use kernel::prelude::*; +use kernel::scatterlist::{Owned, SGTable}; + +use crate::dma::DmaObject; +use crate::firmware::riscv::RiscvFirmware; +use crate::gpu::{Architecture, Chipset}; +use crate::gsp::GSP_PAGE_SIZE; + +/// Ad-hoc and temporary module to extract sections from ELF images. +/// +/// Some firmware images are currently packaged as ELF files, where sections names are used as keys +/// to specific and related bits of data. Future firmware versions are scheduled to move away from +/// that scheme before nova-core becomes stable, which means this module will eventually be +/// removed. +mod elf { + use core::mem::size_of; + + use kernel::bindings; + use kernel::str::CStr; + use kernel::transmute::FromBytes; + + /// Newtype to provide a [`FromBytes`] implementation. + #[repr(transparent)] + struct Elf64Hdr(bindings::elf64_hdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64Hdr {} + + #[repr(transparent)] + struct Elf64SHdr(bindings::elf64_shdr); + // SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. + unsafe impl FromBytes for Elf64SHdr {} + + /// Tries to extract section with name `name` from the ELF64 image `elf`, and returns it. + pub(super) fn elf64_section<'a, 'b>(elf: &'a [u8], name: &'b str) -> Option<&'a [u8]> { + let hdr = &elf + .get(0..size_of::<bindings::elf64_hdr>()) + .and_then(Elf64Hdr::from_bytes)? + .0; + + // Get all the section headers. + let mut shdr = { + let shdr_num = usize::from(hdr.e_shnum); + let shdr_start = usize::try_from(hdr.e_shoff).ok()?; + let shdr_end = shdr_num + .checked_mul(size_of::<Elf64SHdr>()) + .and_then(|v| v.checked_add(shdr_start))?; + + elf.get(shdr_start..shdr_end) + .map(|slice| slice.chunks_exact(size_of::<Elf64SHdr>()))? + }; + + // Get the strings table. + let strhdr = shdr + .clone() + .nth(usize::from(hdr.e_shstrndx)) + .and_then(Elf64SHdr::from_bytes)?; + + // Find the section which name matches `name` and return it. + shdr.find(|&sh| { + let Some(hdr) = Elf64SHdr::from_bytes(sh) else { + return false; + }; + + let Some(name_idx) = strhdr + .0 + .sh_offset + .checked_add(u64::from(hdr.0.sh_name)) + .and_then(|idx| usize::try_from(idx).ok()) + else { + return false; + }; + + // Get the start of the name. + elf.get(name_idx..) + // Stop at the first `0`. + .and_then(|nstr| nstr.get(0..=nstr.iter().position(|b| *b == 0)?)) + // Convert into CStr. This should never fail because of the line above. + .and_then(|nstr| CStr::from_bytes_with_nul(nstr).ok()) + // Convert into str. + .and_then(|c_str| c_str.to_str().ok()) + // Check that the name matches. + .map(|str| str == name) + .unwrap_or(false) + }) + // Return the slice containing the section. + .and_then(|sh| { + let hdr = Elf64SHdr::from_bytes(sh)?; + let start = usize::try_from(hdr.0.sh_offset).ok()?; + let end = usize::try_from(hdr.0.sh_size) + .ok() + .and_then(|sh_size| start.checked_add(sh_size))?; + + elf.get(start..end) + }) + } +} + +/// GSP firmware with 3-level radix page tables for the GSP bootloader. +/// +/// The bootloader expects firmware to be mapped starting at address 0 in GSP's virtual address +/// space: +/// +/// ```text +/// Level 0: 1 page, 1 entry -> points to first level 1 page +/// Level 1: Multiple pages/entries -> each entry points to a level 2 page +/// Level 2: Multiple pages/entries -> each entry points to a firmware page +/// ``` +/// +/// Each page is 4KB, each entry is 8 bytes (64-bit DMA address). +/// Also known as "Radix3" firmware. +#[pin_data] +pub(crate) struct GspFirmware { + /// The GSP firmware inside a [`VVec`], device-mapped via a SG table. + #[pin] + fw: SGTable<Owned<VVec<u8>>>, + /// Level 2 page table whose entries contain DMA addresses of firmware pages. + #[pin] + level2: SGTable<Owned<VVec<u8>>>, + /// Level 1 page table whose entries contain DMA addresses of level 2 pages. + #[pin] + level1: SGTable<Owned<VVec<u8>>>, + /// Level 0 page table (single 4KB page) with one entry: DMA address of first level 1 page. + level0: DmaObject, + /// Size in bytes of the firmware contained in [`Self::fw`]. + size: usize, + /// Device-mapped GSP signatures matching the GPU's [`Chipset`]. + signatures: DmaObject, + /// GSP bootloader, verifies the GSP firmware before loading and running it. + bootloader: RiscvFirmware, +} + +impl GspFirmware { + /// Loads the GSP firmware binaries, map them into `dev`'s address-space, and creates the page + /// tables expected by the GSP bootloader to load it. + pub(crate) fn new<'a, 'b>( + dev: &'a device::Device<device::Bound>, + chipset: Chipset, + ver: &'b str, + ) -> Result<impl PinInit<Self, Error> + 'a> { + let fw = super::request_firmware(dev, chipset, "gsp", ver)?; + + let fw_section = elf::elf64_section(fw.data(), ".fwimage").ok_or(EINVAL)?; + + let sigs_section = match chipset.arch() { + Architecture::Ampere => ".fwsignature_ga10x", + _ => return Err(ENOTSUPP), + }; + let signatures = elf::elf64_section(fw.data(), sigs_section) + .ok_or(EINVAL) + .and_then(|data| DmaObject::from_data(dev, data))?; + + let size = fw_section.len(); + + // Move the firmware into a vmalloc'd vector and map it into the device address + // space. + let fw_vvec = VVec::with_capacity(fw_section.len(), GFP_KERNEL) + .and_then(|mut v| { + v.extend_from_slice(fw_section, GFP_KERNEL)?; + Ok(v) + }) + .map_err(|_| ENOMEM)?; + + let bl = super::request_firmware(dev, chipset, "bootloader", ver)?; + let bootloader = RiscvFirmware::new(dev, &bl)?; + + Ok(try_pin_init!(Self { + fw <- SGTable::new(dev, fw_vvec, DataDirection::ToDevice, GFP_KERNEL), + level2 <- { + // Allocate the level 2 page table, map the firmware onto it, and map it into the + // device address space. + VVec::<u8>::with_capacity( + fw.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level2| map_into_lvl(&fw, level2)) + .map(|level2| SGTable::new(dev, level2, DataDirection::ToDevice, GFP_KERNEL))? + }, + level1 <- { + // Allocate the level 1 page table, map the level 2 page table onto it, and map it + // into the device address space. + VVec::<u8>::with_capacity( + level2.iter().count() * core::mem::size_of::<u64>(), + GFP_KERNEL, + ) + .map_err(|_| ENOMEM) + .and_then(|level1| map_into_lvl(&level2, level1)) + .map(|level1| SGTable::new(dev, level1, DataDirection::ToDevice, GFP_KERNEL))? + }, + level0: { + // Allocate the level 0 page table as a device-visible DMA object, and map the + // level 1 page table onto it. + + // Level 0 page table data. + let mut level0_data = kvec![0u8; GSP_PAGE_SIZE]?; + + // Fill level 1 page entry. + #[allow(clippy::useless_conversion)] + let level1_entry = u64::from(level1.iter().next().unwrap().dma_address()); + let dst = &mut level0_data[..size_of_val(&level1_entry)]; + dst.copy_from_slice(&level1_entry.to_le_bytes()); + + // Turn the level0 page table into a [`DmaObject`]. + DmaObject::from_data(dev, &level0_data)? + }, + size, + signatures, + bootloader, + })) + } + + #[expect(unused)] + /// Returns the DMA handle of the radix3 level 0 page table. + pub(crate) fn radix3_dma_handle(&self) -> DmaAddress { + self.level0.dma_handle() + } +} + +/// Build a page table from a scatter-gather list. +/// +/// Takes each DMA-mapped region from `sg_table` and writes page table entries +/// for all 4KB pages within that region. For example, a 16KB SG entry becomes +/// 4 consecutive page table entries. +fn map_into_lvl(sg_table: &SGTable<Owned<VVec<u8>>>, mut dst: VVec<u8>) -> Result<VVec<u8>> { + for sg_entry in sg_table.iter() { + // Number of pages we need to map. + let num_pages = (sg_entry.dma_len() as usize).div_ceil(GSP_PAGE_SIZE); + + for i in 0..num_pages { + let entry = sg_entry.dma_address() + (i as u64 * GSP_PAGE_SIZE as u64); + dst.extend_from_slice(&entry.to_le_bytes(), GFP_KERNEL)?; + } + } + + Ok(dst) +} diff --git a/drivers/gpu/nova-core/firmware/riscv.rs b/drivers/gpu/nova-core/firmware/riscv.rs new file mode 100644 index 000000000000..afb08f5bc4ba --- /dev/null +++ b/drivers/gpu/nova-core/firmware/riscv.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Support for firmware binaries designed to run on a RISC-V core. Such firmwares files have a +//! dedicated header. + +use core::mem::size_of; + +use kernel::device; +use kernel::firmware::Firmware; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::firmware::BinFirmware; + +/// Descriptor for microcode running on a RISC-V core. +#[repr(C)] +#[derive(Debug)] +struct RmRiscvUCodeDesc { + version: u32, + bootloader_offset: u32, + bootloader_size: u32, + bootloader_param_offset: u32, + bootloader_param_size: u32, + riscv_elf_offset: u32, + riscv_elf_size: u32, + app_version: u32, + manifest_offset: u32, + manifest_size: u32, + monitor_data_offset: u32, + monitor_data_size: u32, + monitor_code_offset: u32, + monitor_code_size: u32, +} + +// SAFETY: all bit patterns are valid for this type, and it doesn't use interior mutability. +unsafe impl FromBytes for RmRiscvUCodeDesc {} + +impl RmRiscvUCodeDesc { + /// Interprets the header of `bin_fw` as a [`RmRiscvUCodeDesc`] and returns it. + /// + /// Fails if the header pointed at by `bin_fw` is not within the bounds of the firmware image. + fn new(bin_fw: &BinFirmware<'_>) -> Result<Self> { + let offset = bin_fw.hdr.header_offset as usize; + + bin_fw + .fw + .get(offset..offset + size_of::<Self>()) + .and_then(Self::from_bytes_copy) + .ok_or(EINVAL) + } +} + +/// A parsed firmware for a RISC-V core, ready to be loaded and run. +#[expect(unused)] +pub(crate) struct RiscvFirmware { + /// Offset at which the code starts in the firmware image. + code_offset: u32, + /// Offset at which the data starts in the firmware image. + data_offset: u32, + /// Offset at which the manifest starts in the firmware image. + manifest_offset: u32, + /// Application version. + app_version: u32, + /// Device-mapped firmware image. + ucode: DmaObject, +} + +impl RiscvFirmware { + /// Parses the RISC-V firmware image contained in `fw`. + pub(crate) fn new(dev: &device::Device<device::Bound>, fw: &Firmware) -> Result<Self> { + let bin_fw = BinFirmware::new(fw)?; + + let riscv_desc = RmRiscvUCodeDesc::new(&bin_fw)?; + + let ucode = { + let start = bin_fw.hdr.data_offset as usize; + let len = bin_fw.hdr.data_size as usize; + + DmaObject::from_data(dev, fw.data().get(start..start + len).ok_or(EINVAL)?)? + }; + + Ok(Self { + ucode, + code_offset: riscv_desc.monitor_code_offset, + data_offset: riscv_desc.monitor_data_offset, + manifest_offset: riscv_desc.manifest_offset, + app_version: riscv_desc.app_version, + }) + } +} diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs new file mode 100644 index 000000000000..8ac1ed187199 --- /dev/null +++ b/drivers/gpu/nova-core/gfw.rs @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU Firmware (`GFW`) support, a.k.a `devinit`. +//! +//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of +//! the GPU is considered unusable until this step is completed, so we must wait on it before +//! performing driver initialization. +//! +//! A clarification about devinit terminology: devinit is a sequence of register read/writes after +//! reset that performs tasks such as: +//! 1. Programming VRAM memory controller timings. +//! 2. Power sequencing. +//! 3. Clock and PLL configuration. +//! 4. Thermal management. +//! +//! devinit itself is a 'script' which is interpreted by an interpreter program typically running +//! on the PMU microcontroller. +//! +//! Note that the devinit sequence also needs to run during suspend/resume. + +use kernel::bindings; +use kernel::prelude::*; +use kernel::time::Delta; + +use crate::driver::Bar0; +use crate::regs; +use crate::util; + +/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. +/// +/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to +/// setup its core parameters. Most of the GPU is considered unusable until this step is completed, +/// so it must be waited on very early during driver initialization. +/// +/// The `GFW` code includes several components that need to execute before the driver loads. These +/// components are located in the VBIOS ROM and executed in a sequence on these different +/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the +/// GSP. +/// +/// This function waits for a signal indicating that core initialization is complete. Before this +/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on +/// the GSP in Heavy-secured mode. +pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { + // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must + // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because + // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware + // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only + // safely read the status register from CPU (LS/Light-secured) once the mask indicates + // that the privilege level has been lowered. + // + // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of + // reset, and should complete in less time than that. + util::wait_on(Delta::from_secs(4), || { + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT status. + let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + .read_protection_level0() + && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); + + if gfw_booted { + Some(()) + } else { + // TODO[DLAY]: replace with [1] once it merges. + // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ + // + // SAFETY: `msleep()` is safe to call with any parameter. + unsafe { bindings::msleep(1) }; + + None + } + }) +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 60b86f370284..af20e2daea24 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -1,12 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{device, devres::Devres, error::code::*, pci, prelude::*}; +use kernel::{device, devres::Devres, error::code::*, fmt, pci, prelude::*, sync::Arc}; use crate::driver::Bar0; -use crate::firmware::{Firmware, FIRMWARE_VERSION}; +use crate::falcon::{gsp::Gsp as GspFalcon, sec2::Sec2 as Sec2Falcon, Falcon}; +use crate::fb::SysmemFlush; +use crate::gfw; +use crate::gsp::Gsp; use crate::regs; -use crate::util; -use core::fmt; macro_rules! define_chipset { ({ $($variant:ident = $value:expr),* $(,)* }) => @@ -22,16 +23,26 @@ macro_rules! define_chipset { $( Chipset::$variant, )* ]; - pub(crate) const NAMES: [&'static str; Self::ALL.len()] = [ - $( util::const_bytes_to_str( - util::to_lowercase_bytes::<{ stringify!($variant).len() }>( - stringify!($variant) - ).as_slice() - ), )* - ]; + ::kernel::macros::paste!( + /// Returns the name of this chipset, in lowercase. + /// + /// # Examples + /// + /// ``` + /// let chipset = Chipset::GA102; + /// assert_eq!(chipset.name(), "ga102"); + /// ``` + pub(crate) const fn name(&self) -> &'static str { + match *self { + $( + Chipset::$variant => stringify!([<$variant:lower>]), + )* + } + } + ); } - // TODO replace with something like derive(FromPrimitive) + // TODO[FPRI]: replace with something like derive(FromPrimitive) impl TryFrom<u32> for Chipset { type Error = kernel::error::Error; @@ -161,31 +172,70 @@ impl Spec { pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 - bar: Devres<Bar0>, - fw: Firmware, + bar: Arc<Devres<Bar0>>, + /// System memory page required for flushing all pending GPU-side memory writes done through + /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). + sysmem_flush: SysmemFlush, + /// GSP falcon instance, used for GSP boot up and cleanup. + gsp_falcon: Falcon<GspFalcon>, + /// SEC2 falcon instance, used for GSP boot up and cleanup. + sec2_falcon: Falcon<Sec2Falcon>, + /// GSP runtime data. Temporarily an empty placeholder. + #[pin] + gsp: Gsp, } impl Gpu { - pub(crate) fn new( - pdev: &pci::Device<device::Bound>, - devres_bar: Devres<Bar0>, - ) -> Result<impl PinInit<Self>> { - let bar = devres_bar.access(pdev.as_ref())?; - let spec = Spec::new(bar)?; - let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; - - dev_info!( - pdev.as_ref(), - "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", - spec.chipset, - spec.chipset.arch(), - spec.revision - ); - - Ok(pin_init!(Self { - spec, + pub(crate) fn new<'a>( + pdev: &'a pci::Device<device::Bound>, + devres_bar: Arc<Devres<Bar0>>, + bar: &'a Bar0, + ) -> impl PinInit<Self, Error> + 'a { + try_pin_init!(Self { + spec: Spec::new(bar).inspect(|spec| { + dev_info!( + pdev.as_ref(), + "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", + spec.chipset, + spec.chipset.arch(), + spec.revision + ); + })?, + + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + }, + + sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, + + gsp_falcon: Falcon::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + ) + .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, + + sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?, + + gsp <- Gsp::new(), + + _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? }, + bar: devres_bar, - fw - })) + }) + } + + /// Called when the corresponding [`Device`](device::Device) is unbound. + /// + /// Note: This method must only be called from `Driver::unbind`. + pub(crate) fn unbind(&self, dev: &device::Device<device::Core>) { + kernel::warn_on!(self + .bar + .access(dev) + .inspect(|bar| self.sysmem_flush.unregister(bar)) + .is_err()); } } diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs new file mode 100644 index 000000000000..64e472e7a9d3 --- /dev/null +++ b/drivers/gpu/nova-core/gsp.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod boot; + +use kernel::prelude::*; + +mod fw; + +pub(crate) const GSP_PAGE_SHIFT: usize = 12; +pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT; + +/// GSP runtime data. +/// +/// This is an empty pinned placeholder for now. +#[pin_data] +pub(crate) struct Gsp {} + +impl Gsp { + pub(crate) fn new() -> impl PinInit<Self> { + pin_init!(Self {}) + } +} diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs new file mode 100644 index 000000000000..2800f3aee37d --- /dev/null +++ b/drivers/gpu/nova-core/gsp/boot.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::device; +use kernel::pci; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; +use crate::firmware::{ + booter::{BooterFirmware, BooterKind}, + fwsec::{FwsecCommand, FwsecFirmware}, + gsp::GspFirmware, + FIRMWARE_VERSION, +}; +use crate::gpu::Chipset; +use crate::regs; +use crate::vbios::Vbios; + +impl super::Gsp { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + fn run_fwsec_frts( + dev: &device::Device<device::Bound>, + falcon: &Falcon<Gsp>, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + + /// Attempt to boot the GSP. + /// + /// This is a GPU-dependent and complex procedure that involves loading firmware files from + /// user-space, patching them with signatures, and building firmware-specific intricate data + /// structures that the GSP will use at runtime. + /// + /// Upon return, the GSP is up and running, and its runtime object given as return value. + pub(crate) fn boot( + self: Pin<&mut Self>, + pdev: &pci::Device<device::Bound>, + bar: &Bar0, + chipset: Chipset, + gsp_falcon: &Falcon<Gsp>, + sec2_falcon: &Falcon<Sec2>, + ) -> Result { + let dev = pdev.as_ref(); + + let bios = Vbios::new(dev, bar)?; + + let _gsp_fw = KBox::pin_init( + GspFirmware::new(dev, chipset, FIRMWARE_VERSION)?, + GFP_KERNEL, + )?; + + let fb_layout = FbLayout::new(chipset, bar)?; + dev_dbg!(dev, "{:#x?}\n", fb_layout); + + Self::run_fwsec_frts(dev, gsp_falcon, bar, &bios, &fb_layout)?; + + let _booter_loader = BooterFirmware::new( + dev, + BooterKind::Loader, + chipset, + FIRMWARE_VERSION, + sec2_falcon, + bar, + )?; + + Ok(()) + } +} diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs new file mode 100644 index 000000000000..34226dd00982 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw.rs @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 + +mod r570_144; + +// Alias to avoid repeating the version number with every use. +#[expect(unused)] +use r570_144 as bindings; diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144.rs b/drivers/gpu/nova-core/gsp/fw/r570_144.rs new file mode 100644 index 000000000000..35cb0370a7c9 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Firmware bindings. +//! +//! Imports the generated bindings by `bindgen`. +//! +//! This module may not be directly used. Please abstract or re-export the needed symbols in the +//! parent module instead. + +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + dead_code, + unused_imports, + clippy::all, + clippy::undocumented_unsafe_blocks, + clippy::ptr_as_ptr, + clippy::ref_as_ptr, + missing_docs, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unreachable_pub, + unsafe_op_in_unsafe_fn +)] +use kernel::ffi; +include!("r570_144/bindings.rs"); diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs new file mode 100644 index 000000000000..cec594032515 --- /dev/null +++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs @@ -0,0 +1 @@ +// SPDX-License-Identifier: GPL-2.0 diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 618632f0abcc..fffcaee2249f 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,18 +2,24 @@ //! Nova Core GPU Driver +mod dma; mod driver; +mod falcon; +mod fb; mod firmware; +mod gfw; mod gpu; +mod gsp; mod regs; mod util; +mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = <LocalModule as kernel::ModuleMetadata>::NAME; kernel::module_pci_driver! { type: driver::NovaCore, name: "NovaCore", - author: "Danilo Krummrich", + authors: ["Danilo Krummrich"], description: "Nova Core GPU driver", license: "GPL v2", firmware: [], diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5a1273230306..206dab2e1335 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -5,12 +5,16 @@ #![allow(non_camel_case_types)] #[macro_use] -mod macros; +pub(crate) mod macros; +use crate::falcon::{ + DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, + FalconModSelAlgo, FalconSecurityModel, PFalcon2Base, PFalconBase, PeregrineCoreSelect, +}; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; -/* PMC */ +// PMC register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { 3:0 minor_revision as u8, "Minor revision of the chip"; @@ -24,7 +28,7 @@ impl NV_PMC_BOOT_0 { /// Combines `architecture_0` and `architecture_1` to obtain the architecture of the chip. pub(crate) fn architecture(self) -> Result<Architecture> { Architecture::try_from( - self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0.len()), + self.architecture_0() | (self.architecture_1() << Self::ARCHITECTURE_0_RANGE.len()), ) } @@ -32,8 +36,311 @@ impl NV_PMC_BOOT_0 { pub(crate) fn chipset(self) -> Result<Chipset> { self.architecture() .map(|arch| { - ((arch as u32) << Self::IMPLEMENTATION.len()) | self.implementation() as u32 + ((arch as u32) << Self::IMPLEMENTATION_RANGE.len()) + | u32::from(self.implementation()) }) .and_then(Chipset::try_from) } } + +// PBUS + +register!(NV_PBUS_SW_SCRATCH @ 0x00001400[64] {}); + +register!(NV_PBUS_SW_SCRATCH_0E_FRTS_ERR => NV_PBUS_SW_SCRATCH[0xe], + "scratch register 0xe used as FRTS firmware error code" { + 31:16 frts_err_code as u16; +}); + +// PFB + +// The following two registers together hold the physical system memory address that is used by the +// GPU to perform sysmembar operations (see `fb::SysmemFlush`). + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { + 31:0 adr_39_08 as u32; +}); + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI @ 0x00100c40 { + 23:0 adr_63_40 as u32; +}); + +register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { + 3:0 lower_scale as u8; + 9:4 lower_mag as u8; + 30:30 ecc_mode_enabled as bool; +}); + +impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) + * kernel::sizes::SZ_1M as u64; + + if self.ecc_mode_enabled() { + // Remove the amount of memory reserved for ECC (one per 16 units). + size / 16 * 15 + } else { + size + } + } +} + +register!(NV_PFB_PRI_MMU_WPR2_ADDR_LO@0x001fa824 { + 31:4 lo_val as u32, "Bits 12..40 of the lower (inclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { + /// Returns the lower (inclusive) bound of the WPR2 region. + pub(crate) fn lower_bound(self) -> u64 { + u64::from(self.lo_val()) << 12 + } +} + +register!(NV_PFB_PRI_MMU_WPR2_ADDR_HI@0x001fa828 { + 31:4 hi_val as u32, "Bits 12..40 of the higher (exclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { + /// Returns the higher (exclusive) bound of the WPR2 region. + /// + /// A value of zero means the WPR2 region is not set. + pub(crate) fn higher_bound(self) -> u64 { + u64::from(self.hi_val()) << 12 + } +} + +// PGC6 register space. +// +// `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except +// for power rails needed to keep self-refresh working and important registers and hardware +// blocks). +// +// These scratch registers remain powered on even in a low-power state and have a designated group +// number. + +// Privilege level mask register. It dictates whether the host CPU has privilege to access the +// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, + "Privilege level mask register" { + 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; +}); + +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234[1] {}); + +register!( + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05[0], + "Scratch group 05 register 0 used as GFW boot progress indicator" { + 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; + } +); + +impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { + /// Returns `true` if GFW boot is completed. + pub(crate) fn completed(self) -> bool { + self.progress() == 0xff + } +} + +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 @ 0x001183a4 { + 31:0 value as u32; +}); + +register!( + NV_USABLE_FB_SIZE_IN_MB => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42, + "Scratch group 42 register used as framebuffer size" { + 31:0 value as u32, "Usable framebuffer size, in megabytes"; + } +); + +impl NV_USABLE_FB_SIZE_IN_MB { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + u64::from(self.value()) * kernel::sizes::SZ_1M as u64 + } +} + +// PDISP + +register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { + 3:3 status_valid as bool, "Set if the `addr` field is valid"; + 31:8 addr as u32, "VGA workspace base address divided by 0x10000"; +}); + +impl NV_PDISP_VGA_WORKSPACE_BASE { + /// Returns the base address of the VGA workspace, or `None` if none exists. + pub(crate) fn vga_workspace_addr(self) -> Option<u64> { + if self.status_valid() { + Some(u64::from(self.addr()) << 16) + } else { + None + } + } +} + +// FUSE + +pub(crate) const NV_FUSE_OPT_FPF_SIZE: usize = 16; + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0[NV_FUSE_OPT_FPF_SIZE] { + 15:0 data as u16; +}); + +// PFALCON + +register!(NV_PFALCON_FALCON_IRQSCLR @ PFalconBase[0x00000004] { + 4:4 halt as bool; + 6:6 swgen0 as bool; +}); + +register!(NV_PFALCON_FALCON_MAILBOX0 @ PFalconBase[0x00000040] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_MAILBOX1 @ PFalconBase[0x00000044] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_RM @ PFalconBase[0x00000084] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_HWCFG2 @ PFalconBase[0x000000f4] { + 10:10 riscv as bool; + 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; + 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; +}); + +impl NV_PFALCON_FALCON_HWCFG2 { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.mem_scrubbing() + } +} + +register!(NV_PFALCON_FALCON_CPUCTL @ PFalconBase[0x00000100] { + 1:1 startcpu as bool; + 4:4 halted as bool; + 6:6 alias_en as bool; +}); + +register!(NV_PFALCON_FALCON_BOOTVEC @ PFalconBase[0x00000104] { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_DMACTL @ PFalconBase[0x0000010c] { + 0:0 require_ctx as bool; + 1:1 dmem_scrubbing as bool; + 2:2 imem_scrubbing as bool; + 6:3 dmaq_num as u8; + 7:7 secure_stat as bool; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE @ PFalconBase[0x00000110] { + 31:0 base as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ PFalconBase[0x00000114] { + 23:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFCMD @ PFalconBase[0x00000118] { + 0:0 full as bool; + 1:1 idle as bool; + 3:2 sec as u8; + 4:4 imem as bool; + 5:5 is_write as bool; + 10:8 size as u8 ?=> DmaTrfCmdSize; + 14:12 ctxdma as u8; + 16:16 set_dmtag as u8; +}); + +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ PFalconBase[0x0000011c] { + 31:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ PFalconBase[0x00000128] { + 8:0 base as u16; +}); + +register!(NV_PFALCON_FALCON_HWCFG1 @ PFalconBase[0x0000012c] { + 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; + 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; + 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; +}); + +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ PFalconBase[0x00000130] { + 1:1 startcpu as bool; +}); + +// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon +// instance. +register!(NV_PFALCON_FALCON_ENGINE @ PFalconBase[0x000003c0] { + 0:0 reset as bool; +}); + +register!(NV_PFALCON_FBIF_TRANSCFG @ PFalconBase[0x00000600[8]] { + 1:0 target as u8 ?=> FalconFbifTarget; + 2:2 mem_type as bool => FalconFbifMemType; +}); + +register!(NV_PFALCON_FBIF_CTL @ PFalconBase[0x00000624] { + 7:7 allow_phys_no_ctx as bool; +}); + +/* PFALCON2 */ + +register!(NV_PFALCON2_FALCON_MOD_SEL @ PFalcon2Base[0x00000180] { + 7:0 algo as u8 ?=> FalconModSelAlgo; +}); + +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ PFalcon2Base[0x00000198] { + 7:0 ucode_id as u8; +}); + +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ PFalcon2Base[0x0000019c] { + 31:0 value as u32; +}); + +// OpenRM defines this as a register array, but doesn't specify its size and only uses its first +// element. Be conservative until we know the actual size or need to use more registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ PFalcon2Base[0x00000210[1]] { + 31:0 value as u32; +}); + +// PRISCV + +register!(NV_PRISCV_RISCV_BCR_CTRL @ PFalconBase[0x00001668] { + 0:0 valid as bool; + 4:4 core_select as bool => PeregrineCoreSelect; + 8:8 br_fetch as bool; +}); + +// The modules below provide registers that are not identical on all supported chips. They should +// only be used in HAL modules. + +pub(crate) mod gm107 { + // FUSE + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { + 0:0 display_disabled as bool; + }); +} + +pub(crate) mod ga100 { + // FUSE + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { + 0:0 display_disabled as bool; + }); +} diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7ecc70efb3cd..8058e1696df9 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -1,17 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -//! Macro to define register layout and accessors. +//! `register!` macro to define register layout and accessors. //! //! A single register typically includes several fields, which are accessed through a combination //! of bit-shift and mask operations that introduce a class of potential mistakes, notably because //! not all possible field values are necessarily valid. //! -//! The macro in this module allow to define, using an intruitive and readable syntax, a dedicated -//! type for each register with its own field accessors that can return an error is a field's value -//! is invalid. +//! The `register!` macro in this module provides an intuitive and readable syntax for defining a +//! dedicated type for each register. Each such type comes with its own field accessors that can +//! return an error if a field's value is invalid. -/// Defines a dedicated type for a register with an absolute offset, alongside with getter and -/// setter methods for its fields and methods to read and write it from an `Io` region. +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub(crate) trait RegisterBase<T> { + const BASE: usize; +} + +/// Defines a dedicated type for a register with an absolute offset, including getter and setter +/// methods for its fields and methods to read and write it from an `Io` region. /// /// Example: /// @@ -24,7 +34,7 @@ /// ``` /// /// This defines a `BOOT_0` type which can be read or written from offset `0x100` of an `Io` -/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 less +/// region. It is composed of 3 fields, for instance `minor_revision` is made of the 4 least /// significant bits of the register. Each field can be accessed and modified using accessor /// methods: /// @@ -33,88 +43,345 @@ /// let boot0 = BOOT_0::read(&bar); /// pr_info!("chip revision: {}.{}", boot0.major_revision(), boot0.minor_revision()); /// -/// // `Chipset::try_from` will be called with the value of the field and returns an error if the -/// // value is invalid. +/// // `Chipset::try_from` is called with the value of the `chipset` field and returns an +/// // error if it is invalid. /// let chipset = boot0.chipset()?; /// /// // Update some fields and write the value back. /// boot0.set_major_revision(3).set_minor_revision(10).write(&bar); /// -/// // Or just read and update the register in a single step: +/// // Or, just read and update the register in a single step: /// BOOT_0::alter(&bar, |r| r.set_major_revision(3).set_minor_revision(10)); /// ``` /// -/// Fields can be defined as follows: +/// Fields are defined as follows: /// -/// - `as <type>` simply returns the field value casted as the requested integer type, typically -/// `u32`, `u16`, `u8` or `bool`. Note that `bool` fields must have a range of 1 bit. +/// - `as <type>` simply returns the field value casted to <type>, typically `u32`, `u16`, `u8` or +/// `bool`. Note that `bool` fields must have a range of 1 bit. /// - `as <type> => <into_type>` calls `<into_type>`'s `From::<<type>>` implementation and returns /// the result. /// - `as <type> ?=> <try_into_type>` calls `<try_into_type>`'s `TryFrom::<<type>>` implementation -/// and returns the result. This is useful on fields for which not all values are value. +/// and returns the result. This is useful with fields for which not all values are valid. /// /// The documentation strings are optional. If present, they will be added to the type's /// definition, or the field getter and setter methods they are attached to. /// -/// Putting a `+` before the address of the register makes it relative to a base: the `read` and -/// `write` methods take a `base` argument that is added to the specified address before access, -/// and `try_read` and `try_write` methods are also created, allowing access with offsets unknown -/// at compile-time: +/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful +/// for cases where a register's interpretation depends on the context: +/// +/// ```no_run +/// register!(SCRATCH @ 0x00000200, "Scratch register" { +/// 31:0 value as u32, "Raw value"; +/// }); +/// +/// register!(SCRATCH_BOOT_STATUS => SCRATCH, "Boot status of the firmware" { +/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// }); +/// ``` +/// +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while also +/// providing its own `completed` field. +/// +/// ## Relative registers +/// +/// A register can be defined as being accessible from a fixed offset of a provided base. For +/// instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition +/// +/// This can be done using the `Base[Offset]` syntax when specifying the register's address. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase<Base>`. Here is the above example translated +/// into code: +/// +/// ```no_run +/// // Type used to identify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase<CpuCtlBase>`. +/// register!(CPU_CTL @ CpuCtlBase[0x10], "CPU core control" { +/// 0:0 start as bool, "Start the CPU core"; +/// }); +/// +/// // The `read`, `write` and `alter` methods of relative registers take an extra `base` argument +/// // that is used to resolve its final address by adding its `BASE` to the offset of the +/// // register. +/// +/// // Start `CPU0`. +/// CPU_CTL::alter(bar, &CPU0, |r| r.set_start(true)); +/// +/// // Start `CPU1`. +/// CPU_CTL::alter(bar, &CPU1, |r| r.set_start(true)); +/// +/// // Aliases can also be defined for relative register. +/// register!(CPU_CTL_ALIAS => CpuCtlBase[CPU_CTL], "Alias to CPU core control" { +/// 1:1 alias_start as bool, "Start the aliased CPU core"; +/// }); +/// +/// // Start the aliased `CPU0`. +/// CPU_CTL_ALIAS::alter(bar, &CPU0, |r| r.set_alias_start(true)); +/// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive values that can be interpreted in the same way. These areas +/// can be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking. Simply define their address as `Address[Size]`, and add +/// an `idx` parameter to their `read`, `write` and `alter` methods: /// /// ```no_run -/// register!(CPU_CTL @ +0x0000010, "CPU core control" { -/// 0:0 start as bool, "Start the CPU core"; +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register!(SCRATCH @ 0x00000080[64], "Scratch registers" { +/// 31:0 value as u32; /// }); /// -/// // Flip the `start` switch for the CPU core which base address is at `CPU_BASE`. -/// let cpuctl = CPU_CTL::read(&bar, CPU_BASE); -/// pr_info!("CPU CTL: {:#x}", cpuctl); -/// cpuctl.set_start(true).write(&bar, CPU_BASE); +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = SCRATCH::read(bar, 0).value(); +/// // Read scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// let scratch_15 = SCRATCH::read(bar, 15).value(); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = SCRATCH::read(bar, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = SCRATCH::try_read(bar, scratch_idx)?.value(); +/// +/// // Alias to a particular register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(FIRMWARE_STATUS => SCRATCH[8], "Firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let status = FIRMWARE_STATUS::read(bar).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(SCRATCH_INTERLEAVED_0 @ 0x000000c0[16 ; 8], "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(SCRATCH_INTERLEAVED_1 @ 0x000000c4[16 ; 8], "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```no_run +/// # fn no_run() -> Result<(), Error> { +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub(crate) struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase<CpuCtlBase> for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// // Singleton of `CPU0` used to identify it. +/// const CPU0: Cpu0 = Cpu0; +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase<CpuCtlBase> for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// // Singleton of `CPU1` used to identify it. +/// const CPU1: Cpu1 = Cpu1; +/// +/// // 64 per-cpu scratch registers, arranged as an contiguous array. +/// register!(CPU_SCRATCH @ CpuCtlBase[0x00000080[64]], "Per-CPU scratch registers" { +/// 31:0 value as u32; +/// }); +/// +/// let cpu0_scratch_0 = CPU_SCRATCH::read(bar, &Cpu0, 0).value(); +/// let cpu1_scratch_15 = CPU_SCRATCH::read(bar, &Cpu1, 15).value(); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = CPU_SCRATCH::read(bar, &Cpu0, 128).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime value returns an error if it is out-of-bounds. +/// let cpu0_some_scratch = CPU_SCRATCH::try_read(bar, &Cpu0, scratch_idx)?.value(); +/// +/// // `SCRATCH[8]` is used to convey the firmware exit code. +/// register!(CPU_FIRMWARE_STATUS => CpuCtlBase[CPU_SCRATCH[8]], +/// "Per-CPU firmware exit status code" { +/// 7:0 status as u8; +/// }); +/// +/// let cpu0_status = CPU_FIRMWARE_STATUS::read(bar, &Cpu0).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array are separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register!(CPU_SCRATCH_INTERLEAVED_0 @ CpuCtlBase[0x00000d00[16 ; 8]], +/// "Scratch registers bank 0" { +/// 31:0 value as u32; +/// }); +/// register!(CPU_SCRATCH_INTERLEAVED_1 @ CpuCtlBase[0x00000d04[16 ; 8]], +/// "Scratch registers bank 1" { +/// 31:0 value as u32; +/// }); +/// # Ok(()) +/// # } /// ``` macro_rules! register { // Creates a register at a fixed offset of the MMIO space. + ($name:ident @ $offset:literal $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $offset); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ($name:ident => $alias:ident $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET); + }; + + // Creates a register at a relative offset from a base address provider. + ($name:ident @ $base:ty [ $offset:literal ] $(, $comment:literal)? { $($fields:tt)* } ) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $offset ]); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ($name:ident => $base:ty [ $alias:ident ] $(, $comment:literal)? { $($fields:tt)* }) => { + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET ]); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. ( - $name:ident @ $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ; $stride:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io $name @ $offset); + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_array $name @ $offset [ $size ; $stride ]); }; - // Creates a register at a relative offset from a base address. + // Shortcut for contiguous array of registers (stride == size of element). ( - $name:ident @ + $offset:literal $(, $comment:literal)? { + $name:ident @ $offset:literal [ $size:expr ] $(, $comment:literal)? { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); - register!(@field_accessors $name { $($fields)* }); - register!(@io$name @ + $offset); + register!($name @ $offset [ $size ; ::core::mem::size_of::<u32>() ] $(, $comment)? { + $($fields)* + } ); }; - // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, - // and conversion to regular `u32`). - (@common $name:ident $(, $comment:literal)?) => { + // Creates an array of registers at a relative offset from a base address provider. + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ; $stride:expr ] ] + $(, $comment:literal)? { $($fields:tt)* } + ) => { + static_assert!(::core::mem::size_of::<u32>() <= $stride); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative_array $name @ $base [ $offset [ $size ; $stride ] ]); + }; + + // Shortcut for contiguous array of relative registers (stride == size of element). + ( + $name:ident @ $base:ty [ $offset:literal [ $size:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!($name @ $base [ $offset [ $size ; ::core::mem::size_of::<u32>() ] ] + $(, $comment)? { $($fields)* } ); + }; + + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. + ( + $name:ident => $base:ty [ $alias:ident [ $idx:expr ] ] $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_relative $name @ $base [ $alias::OFFSET + $idx * $alias::STRIDE ] ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + // This rule belongs to the (non-relative) register arrays set, but needs to be put last + // to avoid it being interpreted in place of the relative register array alias rule. + ($name:ident => $alias:ident [ $idx:expr ] $(, $comment:literal)? { $($fields:tt)* }) => { + static_assert!($idx < $alias::SIZE); + register!(@core $name $(, $comment)? { $($fields)* } ); + register!(@io_fixed $name @ $alias::OFFSET + $idx * $alias::STRIDE ); + }; + + // All rules below are helpers. + + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, + // `Default`, `BitOr`, and conversion to the value type) and field accessor methods. + (@core $name:ident $(, $comment:literal)? { $($fields:tt)* }) => { $( #[doc=$comment] )? #[repr(transparent)] - #[derive(Clone, Copy, Default)] + #[derive(Clone, Copy)] pub(crate) struct $name(u32); - // TODO: display the raw hex value, then the value of all the fields. This requires - // matching the fields, which will complexify the syntax considerably... - impl ::core::fmt::Debug for $name { - fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { - f.debug_tuple(stringify!($name)) - .field(&format_args!("0x{0:x}", &self.0)) - .finish() - } - } - - impl core::ops::BitOr for $name { + impl ::core::ops::BitOr for $name { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -127,6 +394,34 @@ macro_rules! register { reg.0 } } + + register!(@fields_dispatcher $name { $($fields)* }); + }; + + // Captures the fields and passes them to all the implementers that require field information. + // + // Used to simplify the matching rules for implementers, so they don't need to match the entire + // complex fields rule even though they only make use of part of it. + (@fields_dispatcher $name:ident { + $($hi:tt:$lo:tt $field:ident as $type:tt + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + $(, $comment:literal)? + ; + )* + } + ) => { + register!(@field_accessors $name { + $( + $hi:$lo $field as $type + $(?=> $try_into_type)? + $(=> $into_type)? + $(, $comment)? + ; + )* + }); + register!(@debug $name { $($field;)* }); + register!(@default $name { $($field;)* }); }; // Defines all the field getter/methods methods for `$name`. @@ -161,7 +456,7 @@ macro_rules! register { (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi == $lo, concat!("boolean field `", stringify!($field), "` covers more than one bit") ); @@ -172,7 +467,7 @@ macro_rules! register { (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi >= $lo, concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") ); @@ -185,7 +480,7 @@ macro_rules! register { $(, $comment:literal)?; ) => { register!( - @leaf_accessor $name $hi:$lo $field as bool + @leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(if f != 0 { true } else { false }) } $into_type => $into_type $(, $comment)?; ); @@ -203,7 +498,7 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt ?=> $try_into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$try_into_type>::try_from(f as $type) } $try_into_type => ::core::result::Result< $try_into_type, @@ -217,11 +512,11 @@ macro_rules! register { @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt => $into_type:ty $(, $comment:literal)?; ) => { - register!(@leaf_accessor $name $hi:$lo $field as $type + register!(@leaf_accessor $name $hi:$lo $field { |f| <$into_type>::from(f as $type) } $into_type => $into_type $(, $comment)?;); }; - // Shortcut for fields defined as non-`bool` without the `=>` or `?=>` syntax. + // Shortcut for non-boolean fields defined without the `=>` or `?=>` syntax. ( @field_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:tt $(, $comment:literal)?; @@ -231,11 +526,11 @@ macro_rules! register { // Generates the accessor methods for a single field. ( - @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty + @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { - kernel::macros::paste!( - const [<$field:upper>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; + ::kernel::macros::paste!( + const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive<u8> = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); ); @@ -244,9 +539,9 @@ macro_rules! register { #[doc="Returns the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn $field(self) -> $res_type { - kernel::macros::paste!( + ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; ); @@ -255,16 +550,16 @@ macro_rules! register { $process(field) } - kernel::macros::paste!( + ::kernel::macros::paste!( $( #[doc="Sets the value of this field:"] #[doc=$comment] )? - #[inline] + #[inline(always)] pub(crate) fn [<set_ $field>](mut self, value: $to_type) -> Self { const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; - let value = ((value as u32) << SHIFT) & MASK; + let value = (u32::from(value) << SHIFT) & MASK; self.0 = (self.0 & !MASK) | value; self @@ -272,25 +567,64 @@ macro_rules! register { ); }; - // Creates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:literal) => { + // Generates the `Debug` implementation for `$name`. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("<raw>", &::kernel::prelude::fmt!("{:#x}", &self.0)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; + + // Generates the `Default` implementation for `$name`. + (@default $name:ident { $($field:ident;)* }) => { + /// Returns a value for the register where all fields are set to their default value. + impl ::core::default::Default for $name { + fn default() -> Self { + #[allow(unused_mut)] + let mut value = Self(Default::default()); + + ::kernel::macros::paste!( + $( + value.[<set_ $field>](Default::default()); + )* + ); + + value + } + } + }; + + // Generates the IO accessors for a fixed offset register. + (@io_fixed $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>(io: &T) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { Self(io.read32($offset)) } - #[inline] + /// Write the value contained in `self` to the register address in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>(self, io: &T) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { io.write32(self.0, $offset) } - #[inline] + /// Read the register from its address in `io` and run `f` on its value to obtain a new + /// value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, f: F, @@ -304,76 +638,322 @@ macro_rules! register { } }; - // Create the IO accessors for a relative offset register. - (@io $name:ident @ + $offset:literal) => { + // Generates the IO accessors for a relative offset register. + (@io_relative $name:ident @ $base:ty [ $offset:expr ]) => { #[allow(dead_code)] impl $name { - #[inline] + pub(crate) const OFFSET: usize = $offset; + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + let value = io.read32( + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + const OFFSET: usize = $name::OFFSET; + + io.write32( + self.0, + <B as crate::regs::macros::RegisterBase<$base>>::BASE + OFFSET + ); + } + + /// Read the register from `io`, using the base address provided by `base` and adding + /// the register's offset to it, then run `f` on its value to obtain a new value to + /// write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base)); + reg.write(io, base); + } + } + }; + + // Generates the IO accessors for an array of registers. + (@io_array $name:ident @ $offset:literal [ $size:expr ; $stride:expr ]) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from its address in `io`. + #[inline(always)] pub(crate) fn read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> Self where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - Self(io.read32(base + $offset)) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + #[inline(always)] pub(crate) fn write<const SIZE: usize, T>( self, io: &T, - base: usize, + idx: usize ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.write32(self.0, base + $offset) + build_assert!(idx < Self::SIZE); + + let offset = Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + #[inline(always)] pub(crate) fn alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, f: F, ) where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::read(io, base)); - reg.write(io, base); + let reg = f(Self::read(io, idx)); + reg.write(io, idx); } - #[inline] + /// Read the array register at index `idx` from its address in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_read<const SIZE: usize, T>( io: &T, - base: usize, + idx: usize, ) -> ::kernel::error::Result<Self> where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_read32(base + $offset).map(Self) + if idx < Self::SIZE { + Ok(Self::read(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Write the value contained in `self` to the array register with index `idx` in `io`. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_write<const SIZE: usize, T>( self, io: &T, - base: usize, - ) -> ::kernel::error::Result<()> where + idx: usize, + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, { - io.try_write32(self.0, base + $offset) + if idx < Self::SIZE { + Ok(self.write(io, idx)) + } else { + Err(EINVAL) + } } - #[inline] + /// Read the array register at index `idx` in `io` and run `f` on its value to obtain a + /// new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] pub(crate) fn try_alter<const SIZE: usize, T, F>( io: &T, - base: usize, + idx: usize, + f: F, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + F: ::core::ops::FnOnce(Self) -> Self, + { + if idx < Self::SIZE { + Ok(Self::alter(io, idx, f)) + } else { + Err(EINVAL) + } + } + } + }; + + // Generates the IO accessors for an array of relative registers. + ( + @io_relative_array $name:ident @ $base:ty + [ $offset:literal [ $size:expr ; $stride:expr ] ] + ) => { + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + pub(crate) const SIZE: usize = $size; + pub(crate) const STRIDE: usize = $stride; + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + #[inline(always)] + pub(crate) fn read<const SIZE: usize, T, B>( + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize, + ) -> Self where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + let value = io.read32(offset); + + Self(value) + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + #[inline(always)] + pub(crate) fn write<const SIZE: usize, T, B>( + self, + io: &T, + #[allow(unused_variables)] + base: &B, + idx: usize + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + build_assert!(idx < Self::SIZE); + + let offset = <B as crate::regs::macros::RegisterBase<$base>>::BASE + + Self::OFFSET + (idx * Self::STRIDE); + + io.write32(self.0, offset); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + #[inline(always)] + pub(crate) fn alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, + f: F, + ) where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + F: ::core::ops::FnOnce(Self) -> Self, + { + let reg = f(Self::read(io, base, idx)); + reg.write(io, base, idx); + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_read<const SIZE: usize, T, B>( + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result<Self> where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(Self::read(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Write the value contained in `self` to `io`, using the base address provided by + /// `base` and adding the offset of array register `idx` to it. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_write<const SIZE: usize, T, B>( + self, + io: &T, + base: &B, + idx: usize, + ) -> ::kernel::error::Result where + T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, + { + if idx < Self::SIZE { + Ok(self.write(io, base, idx)) + } else { + Err(EINVAL) + } + } + + /// Read the array register at index `idx` from `io`, using the base address provided + /// by `base` and adding the register's offset to it, then run `f` on its value to + /// obtain a new value to write back. + /// + /// The validity of `idx` is checked at run-time, and `EINVAL` is returned is the + /// access was out-of-bounds. + #[inline(always)] + pub(crate) fn try_alter<const SIZE: usize, T, B, F>( + io: &T, + base: &B, + idx: usize, f: F, - ) -> ::kernel::error::Result<()> where + ) -> ::kernel::error::Result where T: ::core::ops::Deref<Target = ::kernel::io::Io<SIZE>>, + B: crate::regs::macros::RegisterBase<$base>, F: ::core::ops::FnOnce(Self) -> Self, { - let reg = f(Self::try_read(io, base)?); - reg.try_write(io, base) + if idx < Self::SIZE { + Ok(Self::alter(io, base, idx, f)) + } else { + Err(EINVAL) + } } } }; diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 332a64cfc6a9..bf35f00cb732 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -1,21 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 -pub(crate) const fn to_lowercase_bytes<const N: usize>(s: &str) -> [u8; N] { - let src = s.as_bytes(); - let mut dst = [0; N]; - let mut i = 0; +use kernel::prelude::*; +use kernel::time::{Delta, Instant, Monotonic}; - while i < src.len() && i < N { - dst[i] = (src[i] as char).to_ascii_lowercase() as u8; - i += 1; - } +/// Wait until `cond` is true or `timeout` elapsed. +/// +/// When `cond` evaluates to `Some`, its return value is returned. +/// +/// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to +/// `Some`. +/// +/// TODO[DLAY]: replace with `read_poll_timeout` once it is available. +/// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) +pub(crate) fn wait_on<R, F: Fn() -> Option<R>>(timeout: Delta, cond: F) -> Result<R> { + let start_time = Instant::<Monotonic>::now(); - dst -} + loop { + if let Some(ret) = cond() { + return Ok(ret); + } -pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { - match core::str::from_utf8(bytes) { - Ok(string) => string, - Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), + if start_time.elapsed().as_nanos() > timeout.as_nanos() { + return Err(ETIMEDOUT); + } } } diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs new file mode 100644 index 000000000000..71fbe71b84db --- /dev/null +++ b/drivers/gpu/nova-core/vbios.rs @@ -0,0 +1,1158 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! VBIOS extraction and parsing. + +use crate::driver::Bar0; +use crate::firmware::fwsec::Bcrt30Rsa3kSignature; +use crate::firmware::FalconUCodeDescV3; +use core::convert::TryFrom; +use kernel::device; +use kernel::error::Result; +use kernel::prelude::*; +use kernel::ptr::{Alignable, Alignment}; +use kernel::types::ARef; + +/// The offset of the VBIOS ROM in the BAR0 space. +const ROM_OFFSET: usize = 0x300000; +/// The maximum length of the VBIOS ROM to scan into. +const BIOS_MAX_SCAN_LEN: usize = 0x100000; +/// The size to read ahead when parsing initial BIOS image headers. +const BIOS_READ_AHEAD_SIZE: usize = 1024; +/// The bit in the last image indicator byte for the PCI Data Structure that +/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. +const LAST_IMAGE_BIT_MASK: u8 = 0x80; + +// PMU lookup table entry types. Used to locate PMU table entries +// in the Fwsec image, corresponding to falcon ucodes. +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05; +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45; +const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; + +/// Vbios Reader for constructing the VBIOS data. +struct VbiosIterator<'a> { + dev: &'a device::Device, + bar0: &'a Bar0, + /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference + /// or copying into other data structures. It is the entire scanned contents of the VBIOS which + /// progressively extends. It is used so that we do not re-read any contents that are already + /// read as we use the cumulative length read so far, and re-read any gaps as we extend the + /// length. + data: KVec<u8>, + /// Current offset of the [`Iterator`]. + current_offset: usize, + /// Indicate whether the last image has been found. + last_found: bool, +} + +impl<'a> VbiosIterator<'a> { + fn new(dev: &'a device::Device, bar0: &'a Bar0) -> Result<Self> { + Ok(Self { + dev, + bar0, + data: KVec::new(), + current_offset: 0, + last_found: false, + }) + } + + /// Read bytes from the ROM at the current end of the data vector. + fn read_more(&mut self, len: usize) -> Result { + let current_len = self.data.len(); + let start = ROM_OFFSET + current_len; + + // Ensure length is a multiple of 4 for 32-bit reads + if len % core::mem::size_of::<u32>() != 0 { + dev_err!( + self.dev, + "VBIOS read length {} is not a multiple of 4\n", + len + ); + return Err(EINVAL); + } + + self.data.reserve(len, GFP_KERNEL)?; + // Read ROM data bytes and push directly to `data`. + for addr in (start..start + len).step_by(core::mem::size_of::<u32>()) { + // Read 32-bit word from the VBIOS ROM + let word = self.bar0.try_read32(addr)?; + + // Convert the `u32` to a 4 byte array and push each byte. + word.to_ne_bytes() + .iter() + .try_for_each(|&b| self.data.push(b, GFP_KERNEL))?; + } + + Ok(()) + } + + /// Read bytes at a specific offset, filling any gap. + fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { + if offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit.\n"); + return Err(EINVAL); + } + + // If `offset` is beyond current data size, fill the gap first. + let current_len = self.data.len(); + let gap_bytes = offset.saturating_sub(current_len); + + // Now read the requested bytes at the offset. + self.read_more(gap_bytes + len) + } + + /// Read a BIOS image at a specific offset and create a [`BiosImage`] from it. + /// + /// `self.data` is extended as needed and a new [`BiosImage`] is returned. + /// `context` is a string describing the operation for error reporting. + fn read_bios_image_at_offset( + &mut self, + offset: usize, + len: usize, + context: &str, + ) -> Result<BiosImage> { + let data_len = self.data.len(); + if offset + len > data_len { + self.read_more_at_offset(offset, len).inspect_err(|e| { + dev_err!( + self.dev, + "Failed to read more at offset {:#x}: {:?}\n", + offset, + e + ) + })?; + } + + BiosImage::new(self.dev, &self.data[offset..offset + len]).inspect_err(|err| { + dev_err!( + self.dev, + "Failed to {} at offset {:#x}: {:?}\n", + context, + offset, + err + ) + }) + } +} + +impl<'a> Iterator for VbiosIterator<'a> { + type Item = Result<BiosImage>; + + /// Iterate over all VBIOS images until the last image is detected or offset + /// exceeds scan limit. + fn next(&mut self) -> Option<Self::Item> { + if self.last_found { + return None; + } + + if self.current_offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.dev, "Error: exceeded BIOS scan limit, stopping scan\n"); + return None; + } + + // Parse image headers first to get image size. + let image_size = match self.read_bios_image_at_offset( + self.current_offset, + BIOS_READ_AHEAD_SIZE, + "parse initial BIOS image headers", + ) { + Ok(image) => image.image_size_bytes(), + Err(e) => return Some(Err(e)), + }; + + // Now create a new `BiosImage` with the full image data. + let full_image = match self.read_bios_image_at_offset( + self.current_offset, + image_size, + "parse full BIOS image", + ) { + Ok(image) => image, + Err(e) => return Some(Err(e)), + }; + + self.last_found = full_image.is_last(); + + // Advance to next image (aligned to 512 bytes). + self.current_offset += image_size; + self.current_offset = self.current_offset.align_up(Alignment::new::<512>())?; + + Some(Ok(full_image)) + } +} + +pub(crate) struct Vbios { + fwsec_image: FwSecBiosImage, +} + +impl Vbios { + /// Probe for VBIOS extraction. + /// + /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. + pub(crate) fn new(dev: &device::Device, bar0: &Bar0) -> Result<Vbios> { + // Images to extract from iteration + let mut pci_at_image: Option<PciAtBiosImage> = None; + let mut first_fwsec_image: Option<FwSecBiosBuilder> = None; + let mut second_fwsec_image: Option<FwSecBiosBuilder> = None; + + // Parse all VBIOS images in the ROM + for image_result in VbiosIterator::new(dev, bar0)? { + let full_image = image_result?; + + dev_dbg!( + dev, + "Found BIOS image: size: {:#x}, type: {}, last: {}\n", + full_image.image_size_bytes(), + full_image.image_type_str(), + full_image.is_last() + ); + + // Get references to images we will need after the loop, in order to + // setup the falcon data offset. + match full_image { + BiosImage::PciAt(image) => { + pci_at_image = Some(image); + } + BiosImage::FwSec(image) => { + if first_fwsec_image.is_none() { + first_fwsec_image = Some(image); + } else { + second_fwsec_image = Some(image); + } + } + // For now we don't need to handle these + BiosImage::Efi(_image) => {} + BiosImage::Nbsi(_image) => {} + } + } + + // Using all the images, setup the falcon data pointer in Fwsec. + if let (Some(mut second), Some(first), Some(pci_at)) = + (second_fwsec_image, first_fwsec_image, pci_at_image) + { + second + .setup_falcon_data(&pci_at, &first) + .inspect_err(|e| dev_err!(dev, "Falcon data setup failed: {:?}\n", e))?; + Ok(Vbios { + fwsec_image: second.build()?, + }) + } else { + dev_err!( + dev, + "Missing required images for falcon data setup, skipping\n" + ); + Err(EINVAL) + } + } + + pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage { + &self.fwsec_image + } +} + +/// PCI Data Structure as defined in PCI Firmware Specification +#[derive(Debug, Clone)] +#[repr(C)] +struct PcirStruct { + /// PCI Data Structure signature ("PCIR" or "NPDS") + signature: [u8; 4], + /// PCI Vendor ID (e.g., 0x10DE for NVIDIA) + vendor_id: u16, + /// PCI Device ID + device_id: u16, + /// Device List Pointer + device_list_ptr: u16, + /// PCI Data Structure Length + pci_data_struct_len: u16, + /// PCI Data Structure Revision + pci_data_struct_rev: u8, + /// Class code (3 bytes, 0x03 for display controller) + class_code: [u8; 3], + /// Size of this image in 512-byte blocks + image_len: u16, + /// Revision Level of the Vendor's ROM + vendor_rom_rev: u16, + /// ROM image type (0x00 = PC-AT compatible, 0x03 = EFI, 0x70 = NBSI) + code_type: u8, + /// Last image indicator (0x00 = Not last image, 0x80 = Last image) + last_image: u8, + /// Maximum Run-time Image Length (units of 512 bytes) + max_runtime_image_len: u16, +} + +impl PcirStruct { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<PcirStruct>() { + dev_err!(dev, "Not enough data for PcirStruct\n"); + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). + if &signature != b"PCIR" && &signature != b"NPDS" { + dev_err!(dev, "Invalid signature for PcirStruct: {:?}\n", signature); + return Err(EINVAL); + } + + let mut class_code = [0u8; 3]; + class_code.copy_from_slice(&data[13..16]); + + let image_len = u16::from_le_bytes([data[16], data[17]]); + if image_len == 0 { + dev_err!(dev, "Invalid image length: 0\n"); + return Err(EINVAL); + } + + Ok(PcirStruct { + signature, + vendor_id: u16::from_le_bytes([data[4], data[5]]), + device_id: u16::from_le_bytes([data[6], data[7]]), + device_list_ptr: u16::from_le_bytes([data[8], data[9]]), + pci_data_struct_len: u16::from_le_bytes([data[10], data[11]]), + pci_data_struct_rev: data[12], + class_code, + image_len, + vendor_rom_rev: u16::from_le_bytes([data[18], data[19]]), + code_type: data[20], + last_image: data[21], + max_runtime_image_len: u16::from_le_bytes([data[22], data[23]]), + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.image_len as usize * 512 + } +} + +/// BIOS Information Table (BIT) Header. +/// +/// This is the head of the BIT table, that is used to locate the Falcon data. The BIT table (with +/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the +/// [`FwSecBiosImage`]. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +struct BitHeader { + /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) + id: u16, + /// 2h: BIT Header Signature ("BIT\0") + signature: [u8; 4], + /// 6h: Binary Coded Decimal Version, ex: 0x0100 is 1.00. + bcd_version: u16, + /// 8h: Size of BIT Header (in bytes) + header_size: u8, + /// 9h: Size of BIT Tokens (in bytes) + token_size: u8, + /// 10h: Number of token entries that follow + token_entries: u8, + /// 11h: BIT Header Checksum + checksum: u8, +} + +impl BitHeader { + fn new(data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<Self>() { + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[2..6]); + + // Check header ID and signature + let id = u16::from_le_bytes([data[0], data[1]]); + if id != 0xB8FF || &signature != b"BIT\0" { + return Err(EINVAL); + } + + Ok(BitHeader { + id, + signature, + bcd_version: u16::from_le_bytes([data[6], data[7]]), + header_size: data[8], + token_size: data[9], + token_entries: data[10], + checksum: data[11], + }) + } +} + +/// BIT Token Entry: Records in the BIT table followed by the BIT header. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct BitToken { + /// 00h: Token identifier + id: u8, + /// 01h: Version of the token data + data_version: u8, + /// 02h: Size of token data in bytes + data_size: u16, + /// 04h: Offset to the token data + data_offset: u16, +} + +// Define the token ID for the Falcon data +const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70; + +impl BitToken { + /// Find a BIT token entry by BIT ID in a PciAtBiosImage + fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result<Self> { + let header = &image.bit_header; + + // Offset to the first token entry + let tokens_start = image.bit_offset + header.header_size as usize; + + for i in 0..header.token_entries as usize { + let entry_offset = tokens_start + (i * header.token_size as usize); + + // Make sure we don't go out of bounds + if entry_offset + header.token_size as usize > image.base.data.len() { + return Err(EINVAL); + } + + // Check if this token has the requested ID + if image.base.data[entry_offset] == token_id { + return Ok(BitToken { + id: image.base.data[entry_offset], + data_version: image.base.data[entry_offset + 1], + data_size: u16::from_le_bytes([ + image.base.data[entry_offset + 2], + image.base.data[entry_offset + 3], + ]), + data_offset: u16::from_le_bytes([ + image.base.data[entry_offset + 4], + image.base.data[entry_offset + 5], + ]), + }); + } + } + + // Token not found + Err(ENOENT) + } +} + +/// PCI ROM Expansion Header as defined in PCI Firmware Specification. +/// +/// This is header is at the beginning of every image in the set of images in the ROM. It contains +/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook +/// System Information), the ROM header deviates from the standard and contains an offset to the +/// NBSI image however we do not yet parse that in this module and keep it for future reference. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct PciRomHeader { + /// 00h: Signature (0xAA55) + signature: u16, + /// 02h: Reserved bytes for processor architecture unique data (20 bytes) + reserved: [u8; 20], + /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image) + nbsi_data_offset: Option<u16>, + /// 18h: Pointer to PCI Data Structure (offset from start of ROM image) + pci_data_struct_offset: u16, + /// 1Ah: Size of block (this is NBSI-specific) + size_of_block: Option<u32>, +} + +impl PciRomHeader { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < 26 { + // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. + return Err(EINVAL); + } + + let signature = u16::from_le_bytes([data[0], data[1]]); + + // Check for valid ROM signatures. + match signature { + 0xAA55 | 0xBB77 | 0x4E56 => {} + _ => { + dev_err!(dev, "ROM signature unknown {:#x}\n", signature); + return Err(EINVAL); + } + } + + // Read the pointer to the PCI Data Structure at offset 0x18. + let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]); + + // Try to read optional fields if enough data. + let mut size_of_block = None; + let mut nbsi_data_offset = None; + + if data.len() >= 30 { + // Read size_of_block at offset 0x1A. + size_of_block = Some( + u32::from(data[29]) << 24 + | u32::from(data[28]) << 16 + | u32::from(data[27]) << 8 + | u32::from(data[26]), + ); + } + + // For NBSI images, try to read the nbsiDataOffset at offset 0x16. + if data.len() >= 24 { + nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]])); + } + + Ok(PciRomHeader { + signature, + reserved: [0u8; 20], + pci_data_struct_offset: pci_data_struct_ptr, + size_of_block, + nbsi_data_offset, + }) + } +} + +/// NVIDIA PCI Data Extension Structure. +/// +/// This is similar to the PCI Data Structure, but is Nvidia-specific and is placed right after the +/// PCI Data Structure. It contains some fields that are redundant with the PCI Data Structure, but +/// are needed for traversing the BIOS images. It is expected to be present in all BIOS images +/// except for NBSI images. +#[derive(Debug, Clone)] +#[repr(C)] +struct NpdeStruct { + /// 00h: Signature ("NPDE") + signature: [u8; 4], + /// 04h: NVIDIA PCI Data Extension Revision + npci_data_ext_rev: u16, + /// 06h: NVIDIA PCI Data Extension Length + npci_data_ext_len: u16, + /// 08h: Sub-image Length (in 512-byte units) + subimage_len: u16, + /// 0Ah: Last image indicator flag + last_image: u8, +} + +impl NpdeStruct { + fn new(dev: &device::Device, data: &[u8]) -> Option<Self> { + if data.len() < core::mem::size_of::<Self>() { + dev_dbg!(dev, "Not enough data for NpdeStruct\n"); + return None; + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "NPDE" (0x4544504E). + if &signature != b"NPDE" { + dev_dbg!(dev, "Invalid signature for NpdeStruct: {:?}\n", signature); + return None; + } + + let subimage_len = u16::from_le_bytes([data[8], data[9]]); + if subimage_len == 0 { + dev_dbg!(dev, "Invalid subimage length: 0\n"); + return None; + } + + Some(NpdeStruct { + signature, + npci_data_ext_rev: u16::from_le_bytes([data[4], data[5]]), + npci_data_ext_len: u16::from_le_bytes([data[6], data[7]]), + subimage_len, + last_image: data[10], + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.subimage_len as usize * 512 + } + + /// Try to find NPDE in the data, the NPDE is right after the PCIR. + fn find_in_data( + dev: &device::Device, + data: &[u8], + rom_header: &PciRomHeader, + pcir: &PcirStruct, + ) -> Option<Self> { + // Calculate the offset where NPDE might be located + // NPDE should be right after the PCIR structure, aligned to 16 bytes + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let npde_start = (pcir_offset + pcir.pci_data_struct_len as usize + 0x0F) & !0x0F; + + // Check if we have enough data + if npde_start + core::mem::size_of::<Self>() > data.len() { + dev_dbg!(dev, "Not enough data for NPDE\n"); + return None; + } + + // Try to create NPDE from the data + NpdeStruct::new(dev, &data[npde_start..]) + } +} + +// Use a macro to implement BiosImage enum and methods. This avoids having to +// repeat each enum type when implementing functions like base() in BiosImage. +macro_rules! bios_image { + ( + $($variant:ident: $class:ident),* $(,)? + ) => { + // BiosImage enum with variants for each image type + enum BiosImage { + $($variant($class)),* + } + + impl BiosImage { + /// Get a reference to the common BIOS image data regardless of type + fn base(&self) -> &BiosImageBase { + match self { + $(Self::$variant(img) => &img.base),* + } + } + + /// Returns a string representing the type of BIOS image + fn image_type_str(&self) -> &'static str { + match self { + $(Self::$variant(_) => stringify!($variant)),* + } + } + } + } +} + +impl BiosImage { + /// Check if this is the last image. + fn is_last(&self) -> bool { + let base = self.base(); + + // For NBSI images (type == 0x70), return true as they're + // considered the last image + if matches!(self, Self::Nbsi(_)) { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = base.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + base.pcir.is_last() + } + + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + let base = self.base(); + + // Prefer NPDE image size if available + if let Some(ref npde) = base.npde { + return npde.image_size_bytes(); + } + + // Otherwise, fall back to the PCIR image size + base.pcir.image_size_bytes() + } + + /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which + /// triggers the constructor of the specific BiosImage enum variant. + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + let base = BiosImageBase::new(dev, data)?; + let image = base.into_image().inspect_err(|e| { + dev_err!(dev, "Failed to create BiosImage: {:?}\n", e); + })?; + + Ok(image) + } +} + +bios_image! { + PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image + Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) + Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) + FwSec: FwSecBiosBuilder, // FWSEC (Firmware Security) +} + +/// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. +/// +/// It contains the BIT header and the BIT tokens. +struct PciAtBiosImage { + base: BiosImageBase, + bit_header: BitHeader, + bit_offset: usize, +} + +struct EfiBiosImage { + base: BiosImageBase, + // EFI-specific fields can be added here in the future. +} + +struct NbsiBiosImage { + base: BiosImageBase, + // NBSI-specific fields can be added here in the future. +} + +struct FwSecBiosBuilder { + base: BiosImageBase, + /// These are temporary fields that are used during the construction of the + /// [`FwSecBiosBuilder`]. + /// + /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new + /// [`FwSecBiosImage`]. + /// + /// The offset of the Falcon data from the start of Fwsec image. + falcon_data_offset: Option<usize>, + /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer. + pmu_lookup_table: Option<PmuLookupTable>, + /// The offset of the Falcon ucode. + falcon_ucode_offset: Option<usize>, +} + +/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode. +/// +/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. +pub(crate) struct FwSecBiosImage { + base: BiosImageBase, + /// The offset of the Falcon ucode. + falcon_ucode_offset: usize, +} + +// Convert from BiosImageBase to BiosImage +impl TryFrom<BiosImageBase> for BiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result<Self> { + match base.pcir.code_type { + 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), + 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), + 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), + 0xE0 => Ok(BiosImage::FwSec(FwSecBiosBuilder { + base, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + })), + _ => Err(EINVAL), + } + } +} + +/// BIOS Image structure containing various headers and reference fields to all BIOS images. +/// +/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that +/// Rust favors composition of types over inheritance. +#[expect(dead_code)] +struct BiosImageBase { + /// Used for logging. + dev: ARef<device::Device>, + /// PCI ROM Expansion Header + rom_header: PciRomHeader, + /// PCI Data Structure + pcir: PcirStruct, + /// NVIDIA PCI Data Extension (optional) + npde: Option<NpdeStruct>, + /// Image data (includes ROM header and PCIR) + data: KVec<u8>, +} + +impl BiosImageBase { + fn into_image(self) -> Result<BiosImage> { + BiosImage::try_from(self) + } + + /// Creates a new BiosImageBase from raw byte data. + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + // Ensure we have enough data for the ROM header. + if data.len() < 26 { + dev_err!(dev, "Not enough data for ROM header\n"); + return Err(EINVAL); + } + + // Parse the ROM header. + let rom_header = PciRomHeader::new(dev, &data[0..26]) + .inspect_err(|e| dev_err!(dev, "Failed to create PciRomHeader: {:?}\n", e))?; + + // Get the PCI Data Structure using the pointer from the ROM header. + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let pcir_data = data + .get(pcir_offset..pcir_offset + core::mem::size_of::<PcirStruct>()) + .ok_or(EINVAL) + .inspect_err(|_| { + dev_err!( + dev, + "PCIR offset {:#x} out of bounds (data length: {})\n", + pcir_offset, + data.len() + ); + dev_err!( + dev, + "Consider reading more data for construction of BiosImage\n" + ); + })?; + + let pcir = PcirStruct::new(dev, pcir_data) + .inspect_err(|e| dev_err!(dev, "Failed to create PcirStruct: {:?}\n", e))?; + + // Look for NPDE structure if this is not an NBSI image (type != 0x70). + let npde = NpdeStruct::find_in_data(dev, data, &rom_header, &pcir); + + // Create a copy of the data. + let mut data_copy = KVec::new(); + data_copy.extend_from_slice(data, GFP_KERNEL)?; + + Ok(BiosImageBase { + dev: dev.into(), + rom_header, + pcir, + npde, + data: data_copy, + }) + } +} + +impl PciAtBiosImage { + /// Find a byte pattern in a slice. + fn find_byte_pattern(haystack: &[u8], needle: &[u8]) -> Result<usize> { + haystack + .windows(needle.len()) + .position(|window| window == needle) + .ok_or(EINVAL) + } + + /// Find the BIT header in the [`PciAtBiosImage`]. + fn find_bit_header(data: &[u8]) -> Result<(BitHeader, usize)> { + let bit_pattern = [0xff, 0xb8, b'B', b'I', b'T', 0x00]; + let bit_offset = Self::find_byte_pattern(data, &bit_pattern)?; + let bit_header = BitHeader::new(&data[bit_offset..])?; + + Ok((bit_header, bit_offset)) + } + + /// Get a BIT token entry from the BIT table in the [`PciAtBiosImage`] + fn get_bit_token(&self, token_id: u8) -> Result<BitToken> { + BitToken::from_id(self, token_id) + } + + /// Find the Falcon data pointer structure in the [`PciAtBiosImage`]. + /// + /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC + /// image. + fn falcon_data_ptr(&self) -> Result<u32> { + let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; + + // Make sure we don't go out of bounds + if token.data_offset as usize + 4 > self.base.data.len() { + return Err(EINVAL); + } + + // read the 4 bytes at the offset specified in the token + let offset = token.data_offset as usize; + let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { + dev_err!(self.base.dev, "Failed to convert data slice to array"); + EINVAL + })?; + + let data_ptr = u32::from_le_bytes(bytes); + + if (data_ptr as usize) < self.base.data.len() { + dev_err!(self.base.dev, "Falcon data pointer out of bounds\n"); + return Err(EINVAL); + } + + Ok(data_ptr) + } +} + +impl TryFrom<BiosImageBase> for PciAtBiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result<Self> { + let data_slice = &base.data; + let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; + + Ok(PciAtBiosImage { + base, + bit_header, + bit_offset, + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. +/// +/// See the [`PmuLookupTable`] description for more information. +#[repr(C, packed)] +struct PmuLookupTableEntry { + application_id: u8, + target_id: u8, + data: u32, +} + +impl PmuLookupTableEntry { + fn new(data: &[u8]) -> Result<Self> { + if data.len() < core::mem::size_of::<Self>() { + return Err(EINVAL); + } + + Ok(PmuLookupTableEntry { + application_id: data[0], + target_id: data[1], + data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?), + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given +/// application ID. +/// +/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to +/// locate the Falcon Ucode. +#[expect(dead_code)] +struct PmuLookupTable { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, + table_data: KVec<u8>, +} + +impl PmuLookupTable { + fn new(dev: &device::Device, data: &[u8]) -> Result<Self> { + if data.len() < 4 { + return Err(EINVAL); + } + + let header_len = data[1] as usize; + let entry_len = data[2] as usize; + let entry_count = data[3] as usize; + + let required_bytes = header_len + (entry_count * entry_len); + + if data.len() < required_bytes { + dev_err!(dev, "PmuLookupTable data length less than required\n"); + return Err(EINVAL); + } + + // Create a copy of only the table data + let table_data = { + let mut ret = KVec::new(); + ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?; + ret + }; + + // Debug logging of entries (dumps the table data to dmesg) + for i in (header_len..required_bytes).step_by(entry_len) { + dev_dbg!(dev, "PMU entry: {:02x?}\n", &data[i..][..entry_len]); + } + + Ok(PmuLookupTable { + version: data[0], + header_len: header_len as u8, + entry_len: entry_len as u8, + entry_count: entry_count as u8, + table_data, + }) + } + + fn lookup_index(&self, idx: u8) -> Result<PmuLookupTableEntry> { + if idx >= self.entry_count { + return Err(EINVAL); + } + + let index = (idx as usize) * self.entry_len as usize; + PmuLookupTableEntry::new(&self.table_data[index..]) + } + + // find entry by type value + fn find_entry_by_type(&self, entry_type: u8) -> Result<PmuLookupTableEntry> { + for i in 0..self.entry_count { + let entry = self.lookup_index(i)?; + if entry.application_id == entry_type { + return Ok(entry); + } + } + + Err(EINVAL) + } +} + +impl FwSecBiosBuilder { + fn setup_falcon_data( + &mut self, + pci_at_image: &PciAtBiosImage, + first_fwsec: &FwSecBiosBuilder, + ) -> Result { + let mut offset = pci_at_image.falcon_data_ptr()? as usize; + let mut pmu_in_first_fwsec = false; + + // The falcon data pointer assumes that the PciAt and FWSEC images + // are contiguous in memory. However, testing shows the EFI image sits in + // between them. So calculate the offset from the end of the PciAt image + // rather than the start of it. Compensate. + offset -= pci_at_image.base.data.len(); + + // The offset is now from the start of the first Fwsec image, however + // the offset points to a location in the second Fwsec image. Since + // the fwsec images are contiguous, subtract the length of the first Fwsec + // image from the offset to get the offset to the start of the second + // Fwsec image. + if offset < first_fwsec.base.data.len() { + pmu_in_first_fwsec = true; + } else { + offset -= first_fwsec.base.data.len(); + } + + self.falcon_data_offset = Some(offset); + + if pmu_in_first_fwsec { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &first_fwsec.base.data[offset..], + )?); + } else { + self.pmu_lookup_table = Some(PmuLookupTable::new( + &self.base.dev, + &self.base.data[offset..], + )?); + } + + match self + .pmu_lookup_table + .as_ref() + .ok_or(EINVAL)? + .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) + { + Ok(entry) => { + let mut ucode_offset = entry.data as usize; + ucode_offset -= pci_at_image.base.data.len(); + if ucode_offset < first_fwsec.base.data.len() { + dev_err!(self.base.dev, "Falcon Ucode offset not in second Fwsec.\n"); + return Err(EINVAL); + } + ucode_offset -= first_fwsec.base.data.len(); + self.falcon_ucode_offset = Some(ucode_offset); + } + Err(e) => { + dev_err!( + self.base.dev, + "PmuLookupTableEntry not found, error: {:?}\n", + e + ); + return Err(EINVAL); + } + } + Ok(()) + } + + /// Build the final FwSecBiosImage from this builder + fn build(self) -> Result<FwSecBiosImage> { + let ret = FwSecBiosImage { + base: self.base, + falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, + }; + + if cfg!(debug_assertions) { + // Print the desc header for debugging + let desc = ret.header()?; + dev_dbg!(ret.base.dev, "PmuLookupTableEntry desc: {:#?}\n", desc); + } + + Ok(ret) + } +} + +impl FwSecBiosImage { + /// Get the FwSec header ([`FalconUCodeDescV3`]). + pub(crate) fn header(&self) -> Result<&FalconUCodeDescV3> { + // Get the falcon ucode offset that was found in setup_falcon_data. + let falcon_ucode_offset = self.falcon_ucode_offset; + + // Make sure the offset is within the data bounds. + if falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>() > self.base.data.len() { + dev_err!( + self.base.dev, + "fwsec-frts header not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + // Read the first 4 bytes to get the version. + let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4] + .try_into() + .map_err(|_| EINVAL)?; + let hdr = u32::from_le_bytes(hdr_bytes); + let ver = (hdr & 0xff00) >> 8; + + if ver != 3 { + dev_err!(self.base.dev, "invalid fwsec firmware version: {:?}\n", ver); + return Err(EINVAL); + } + + // Return a reference to the FalconUCodeDescV3 structure. + // + // SAFETY: We have checked that `falcon_ucode_offset + size_of::<FalconUCodeDescV3>` is + // within the bounds of `data`. Also, this data vector is from ROM, and the `data` field + // in `BiosImageBase` is immutable after construction. + Ok(unsafe { + &*(self + .base + .data + .as_ptr() + .add(falcon_ucode_offset) + .cast::<FalconUCodeDescV3>()) + }) + } + + /// Get the ucode data as a byte slice + pub(crate) fn ucode(&self, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + let falcon_ucode_offset = self.falcon_ucode_offset; + + // The ucode data follows the descriptor. + let ucode_data_offset = falcon_ucode_offset + desc.size(); + let size = (desc.imem_load_size + desc.dmem_load_size) as usize; + + // Get the data slice, checking bounds in a single operation. + self.base + .data + .get(ucode_data_offset..ucode_data_offset + size) + .ok_or(ERANGE) + .inspect_err(|_| { + dev_err!( + self.base.dev, + "fwsec ucode data not contained within BIOS bounds\n" + ) + }) + } + + /// Get the signatures as a byte slice + pub(crate) fn sigs(&self, desc: &FalconUCodeDescV3) -> Result<&[Bcrt30Rsa3kSignature]> { + // The signatures data follows the descriptor. + let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::<FalconUCodeDescV3>(); + let sigs_size = + desc.signature_count as usize * core::mem::size_of::<Bcrt30Rsa3kSignature>(); + + // Make sure the data is within bounds. + if sigs_data_offset + sigs_size > self.base.data.len() { + dev_err!( + self.base.dev, + "fwsec signatures data not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + // SAFETY: we checked that `data + sigs_data_offset + (signature_count * + // sizeof::<Bcrt30Rsa3kSignature>()` is within the bounds of `data`. + Ok(unsafe { + core::slice::from_raw_parts( + self.base + .data + .as_ptr() + .add(sigs_data_offset) + .cast::<Bcrt30Rsa3kSignature>(), + desc.signature_count as usize, + ) + }) + } +} diff --git a/drivers/gpu/trace/Kconfig b/drivers/gpu/trace/Kconfig index c24e9edd022e..cd3d19c4a201 100644 --- a/drivers/gpu/trace/Kconfig +++ b/drivers/gpu/trace/Kconfig @@ -1,4 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only config TRACE_GPU_MEM - bool + bool "Enable GPU memory usage tracepoints" + default n + help + Choose this option to enable tracepoints for tracking + global and per-process GPU memory usage. Intended for + performance profiling and required for Android. + + Tracepoint availability varies by GPU driver. + + If in doubt, say "N". diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index 18f2c92beff8..68e45a26e85f 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -437,7 +437,7 @@ find_active_client(struct list_head *head) */ bool vga_switcheroo_client_probe_defer(struct pci_dev *pdev) { - if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + if (pci_is_display(pdev)) { /* * apple-gmux is needed on pre-retina MacBook Pro * to probe the panel if pdev is the inactive GPU. |
