diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
119 files changed, 3146 insertions, 2650 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 79bb6fd83094..9999c18e7d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -219,10 +219,12 @@ extern int amdgpu_use_xgmi_p2p; extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; +extern int halt_if_hws_hang; #else static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; +static const int __maybe_unused halt_if_hws_hang; #endif #ifdef CONFIG_HSA_AMD_P2P extern bool pcie_p2p; @@ -274,9 +276,6 @@ extern int amdgpu_vcnfw_log; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) -#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0) -#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1) - /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -678,7 +677,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 11 +#define HWIP_MAX_INSTANCE 28 #define HW_ID_MAX 300 #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) @@ -885,6 +884,7 @@ struct amdgpu_device { u64 fence_context; unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct dma_fence __rcu *gang_submit; bool ib_pool_ready; struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; @@ -1064,7 +1064,6 @@ struct amdgpu_device { struct work_struct reset_work; - uint32_t amdgpu_reset_level_mask; bool job_hang; }; @@ -1294,6 +1293,8 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 55402d238919..b14800ac179e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2012 Advanced Micro Devices, Inc. * @@ -849,6 +850,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; + if (dm->backlight_dev[0]) atif->bd = dm->backlight_dev[0]; #endif @@ -863,6 +865,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) && enc->enc_priv) { struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; + if (dig->bl_dev) { atif->bd = dig->bl_dev; break; @@ -919,9 +922,9 @@ static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATIF", &atif_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atif.handle = atif_handle; acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); @@ -954,9 +957,9 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) return false; status = acpi_get_handle(dhandle, "ATCS", &atcs_handle); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return false; - } + amdgpu_acpi_priv.atcs.handle = atcs_handle; acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer); DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name); @@ -1050,6 +1053,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) return false; + + if (amdgpu_sriov_vf(adev)) + return false; + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 091415a4abf0..f99d4873bf22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * @@ -74,9 +75,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; adev->kfd.dev = kgd2kfd_probe(adev, vf); - - if (adev->kfd.dev) - amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -130,12 +128,12 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) kfd.reset_work); struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } @@ -197,7 +195,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) } adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, - adev_to_drm(adev), &gpu_resources); + &gpu_resources); + + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } @@ -208,6 +208,7 @@ void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) if (adev->kfd.dev) { kgd2kfd_device_exit(adev->kfd.dev); adev->kfd.dev = NULL; + amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size; } } @@ -672,7 +673,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err; } - ret = amdgpu_job_alloc(adev, 1, &job, NULL); + ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job); if (ret) goto err; @@ -684,6 +685,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, ib->length_dw = ib_len; /* This works for NO_HWS. TODO: need to handle without knowing VMID */ job->vmid = vmid; + job->num_ibs = 1; ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); @@ -704,6 +706,13 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { + /* Temporary workaround to fix issues observed in some + * compute applications when GFXOFF is enabled on GFX11. + */ + if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { + pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); + amdgpu_gfx_off_ctrl(adev, idle); + } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -751,13 +760,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - - /* CPU MCA will handle page retirement if connected_to_cpu is 1 */ - if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, &err_data, reset); - else if (reset) - amdgpu_amdkfd_gpu_reset(adev); + amdgpu_umc_poison_handler(adev, reset); } bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 647220a8762d..f50e3ba4d7a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -353,7 +353,6 @@ int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf); bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); @@ -381,7 +380,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) } static inline -bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, +bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index 0b0a72ca5695..7e80caa05060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id) lock_srbm(adev, mec, pipe, 0, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL), + WREG32_SOC15(GC, 0, regCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 81e3b528bbc9..e92b93557c13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { - gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* @@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); soc15_grbm_select(adev, 0, 0, 0, 0); unlock_spi_csq_mutexes(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2170db83e41d..ba72a910d0d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014-2018 Advanced Micro Devices, Inc. * @@ -297,7 +298,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement, DMA_RESV_USAGE_READ); + replacement, DMA_RESV_USAGE_BOOKKEEP); dma_fence_put(replacement); return 0; } @@ -417,9 +418,9 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - switch (adev->asic_type) { - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { if (uncached) @@ -428,7 +429,7 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) mapping_flags |= AMDGPU_VM_MTYPE_CC; else mapping_flags |= AMDGPU_VM_MTYPE_RW; - if (adev->asic_type == CHIP_ALDEBARAN && + if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -509,13 +510,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct ttm_tt *ttm = bo->tbo.ttm; int ret; + if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) + return -EINVAL; + ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL); if (unlikely(!ttm->sg)) return -ENOMEM; - if (WARN_ON(ttm->num_pages != src_ttm->num_pages)) - return -EINVAL; - /* Same sequence as in amdgpu_ttm_tt_pin_userptr */ ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages, ttm->num_pages, 0, @@ -1390,8 +1391,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(vm->root.bo, - &vm->process_info->eviction_fence->base, true); + dma_resv_add_fence(vm->root.bo->tbo.base.resv, + &vm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(vm->root.bo); /* Update process info */ @@ -1612,6 +1614,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t available; + spin_lock(&kfd_mem_limit.mem_limit_lock); available = adev->gmc.real_vram_size - adev->kfd.vram_used_aligned @@ -1904,16 +1907,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( */ mutex_lock(&mem->process_info->lock); - /* Lock mmap-sem. If we find an invalid userptr BO, we can be - * sure that the MMU notifier is no longer running - * concurrently and the queues are actually stopped - */ - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - mmap_write_lock(current->mm); - is_invalid_userptr = atomic_read(&mem->invalid); - mmap_write_unlock(current->mm); - } - mutex_lock(&mem->lock); domain = mem->domain; @@ -1987,9 +1980,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - amdgpu_bo_fence(bo, - &avm->process_info->eviction_fence->base, - true); + dma_resv_add_fence(bo->tbo.base.resv, + &avm->process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2216,7 +2209,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, { if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); + mb(); /* make sure read happened */ atomic_set(&adev->gmc.vm_fault_info_updated, 0); } return 0; @@ -2758,15 +2751,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (mem->bo->tbo.pin_count) continue; - amdgpu_bo_fence(mem->bo, - &process_info->eviction_fence->base, true); + dma_resv_add_fence(mem->bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; - amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); } validate_map_fail: @@ -2820,7 +2816,9 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; - amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + dma_resv_add_fence(gws_bo->tbo.base.resv, + &process_info->eviction_fence->base, + DMA_RESV_USAGE_BOOKKEEP); amdgpu_bo_unreserve(gws_bo); mutex_unlock(&(*mem)->process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index b7933c2ce765..e1320edfc527 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -26,7 +26,6 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -1674,10 +1673,12 @@ amdgpu_connector_add(struct amdgpu_device *adev, adev->mode_info.dither_property, AMDGPU_FMT_DITHER_DISABLE); - if (amdgpu_audio != 0) + if (amdgpu_audio != 0) { drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; + } subpixel_order = SubPixelHorizontalRGB; connector->interlace_allowed = true; @@ -1799,6 +1800,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1852,6 +1854,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, @@ -1902,6 +1905,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.audio_property, AMDGPU_AUDIO_AUTO); + amdgpu_connector->audio = AMDGPU_AUDIO_AUTO; } drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.dither_property, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b7bae833c804..0528c2b1db6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -39,9 +39,82 @@ #include "amdgpu_gem.h" #include "amdgpu_ras.h" -static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_cs_chunk_fence *data, - uint32_t *offset) +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, + struct amdgpu_device *adev, + struct drm_file *filp, + union drm_amdgpu_cs *cs) +{ + struct amdgpu_fpriv *fpriv = filp->driver_priv; + + if (cs->in.num_chunks == 0) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->adev = adev; + p->filp = filp; + + p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); + if (!p->ctx) + return -EINVAL; + + if (atomic_read(&p->ctx->guilty)) { + amdgpu_ctx_put(p->ctx); + return -ECANCELED; + } + return 0; +} + +static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib) +{ + struct drm_sched_entity *entity; + unsigned int i; + int r; + + r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, + chunk_ib->ring, &entity); + if (r) + return r; + + /* + * Abort if there is no run queue associated with this entity. + * Possibly because of disabled HW IP. + */ + if (entity->rq == NULL) + return -EINVAL; + + /* Check if we can add this IB to some existing job */ + for (i = 0; i < p->gang_size; ++i) + if (p->entities[i] == entity) + return i; + + /* If not increase the gang size if possible */ + if (i == AMDGPU_CS_GANG_SIZE) + return -EINVAL; + + p->entities[i] = entity; + p->gang_size = i + 1; + return i; +} + +static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_ib *chunk_ib, + unsigned int *num_ibs) +{ + int r; + + r = amdgpu_cs_job_idx(p, chunk_ib); + if (r < 0) + return r; + + ++(num_ibs[r]); + return 0; +} + +static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, + struct drm_amdgpu_cs_chunk_fence *data, + uint32_t *offset) { struct drm_gem_object *gobj; struct amdgpu_bo *bo; @@ -80,11 +153,11 @@ error_unref: return r; } -static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, - struct drm_amdgpu_bo_list_in *data) +static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, + struct drm_amdgpu_bo_list_in *data) { + struct drm_amdgpu_bo_list_entry *info; int r; - struct drm_amdgpu_bo_list_entry *info = NULL; r = amdgpu_bo_create_list_entry_array(data, &info); if (r) @@ -104,38 +177,25 @@ error_free: return r; } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) +/* Copy the data from userspace and go over it the first time */ +static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = &fpriv->vm; uint64_t *chunk_array_user; uint64_t *chunk_array; - unsigned size, num_ibs = 0; uint32_t uf_offset = 0; - int i; + unsigned int size; int ret; + int i; - if (cs->in.num_chunks == 0) - return -EINVAL; - - chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); + chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), + GFP_KERNEL); if (!chunk_array) return -ENOMEM; - p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); - if (!p->ctx) { - ret = -EINVAL; - goto free_chunk; - } - - mutex_lock(&p->ctx->lock); - - /* skip guilty context job */ - if (atomic_read(&p->ctx->guilty) == 1) { - ret = -ECANCELED; - goto free_chunk; - } - /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -170,7 +230,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs size = p->chunks[i].length_dw; cdata = u64_to_user_ptr(user_chunk.chunk_data); - p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); + p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), + GFP_KERNEL); if (p->chunks[i].kdata == NULL) { ret = -ENOMEM; i--; @@ -182,36 +243,35 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_partial_kdata; } + /* Assume the worst on the following checks */ + ret = -EINVAL; switch (p->chunks[i].chunk_id) { case AMDGPU_CHUNK_ID_IB: - ++num_ibs; + if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) + goto free_partial_kdata; + + ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); + if (ret) + goto free_partial_kdata; break; case AMDGPU_CHUNK_ID_FENCE: - size = sizeof(struct drm_amdgpu_cs_chunk_fence); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_cs_chunk_fence)) goto free_partial_kdata; - } - ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, - &uf_offset); + ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata, + &uf_offset); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_BO_HANDLES: - size = sizeof(struct drm_amdgpu_bo_list_in); - if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { - ret = -EINVAL; + if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; - } - ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; - break; case AMDGPU_CHUNK_ID_DEPENDENCIES: @@ -223,22 +283,28 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs break; default: - ret = -EINVAL; goto free_partial_kdata; } } - ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); - if (ret) - goto free_all_kdata; + if (!p->gang_size) + return -EINVAL; - if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + for (i = 0; i < p->gang_size; ++i) { + ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, + num_ibs[i], &p->jobs[i]); + if (ret) + goto free_all_kdata; + } + p->gang_leader = p->jobs[p->gang_size - 1]; + + if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; } if (p->uf_entry.tv.bo) - p->job->uf_addr = uf_offset; + p->gang_leader->uf_addr = uf_offset; kvfree(chunk_array); /* Use this opportunity to fill in task info for the vm */ @@ -260,6 +326,308 @@ free_chunk: return ret; } +static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk, + unsigned int *ce_preempt, + unsigned int *de_preempt) +{ + struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int r; + + r = amdgpu_cs_job_idx(p, chunk_ib); + if (r < 0) + return r; + + job = p->jobs[r]; + ring = amdgpu_job_ring(job); + ib = &job->ibs[job->num_ibs++]; + + /* MM engine doesn't support user fences */ + if (p->uf_entry.tv.bo && ring->funcs->no_user_fence) + return -EINVAL; + + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + (*ce_preempt)++; + else + (*de_preempt)++; + + /* Each GFX command submit allows only 1 IB max + * preemptible for CE & DE */ + if (*ce_preempt > 1 || *de_preempt > 1) + return -EINVAL; + } + + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; + + r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + ib->gpu_addr = chunk_ib->va_start; + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; + return 0; +} + +static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; + struct dma_fence *fence; + + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; + + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence; + struct dma_fence *old = fence; + + s_fence = to_drm_sched_fence(fence); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + + r = amdgpu_sync_fence(&p->sync, fence); + dma_fence_put(fence); + if (r) + return r; + } + return 0; +} + +static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, + uint32_t handle, u64 point, + u64 flags) +{ + struct dma_fence *fence; + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); + return r; + } + + r = amdgpu_sync_fence(&p->sync, fence); + if (r) + goto error; + + /* + * When we have an explicit dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one. + */ + if (fence->context == p->gang_leader->base.entity->fence_context) + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + +error: + dma_fence_put(fence); + return r; +} + +static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; + unsigned num_deps; + int i, r; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; + unsigned num_deps; + int i; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + + for (i = 0; i < num_deps; ++i) { + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) + return -EINVAL; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; + unsigned num_deps; + int i; + + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = dma_fence_chain_alloc(); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + dma_fence_chain_free(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) +{ + unsigned int ce_preempt = 0, de_preempt = 0; + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_IB: + r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + r = amdgpu_cs_p2_dependencies(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + r = amdgpu_cs_p2_syncobj_in(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + r = amdgpu_cs_p2_syncobj_out(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk); + if (r) + return r; + break; + } + } + + return 0; +} + /* Convert microseconds to bytes. */ static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) { @@ -495,9 +863,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - struct amdgpu_bo *gds; - struct amdgpu_bo *gws; - struct amdgpu_bo *oa; + unsigned int i; int r; INIT_LIST_HEAD(&p->validated); @@ -581,16 +947,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->bo_va = amdgpu_vm_bo_find(vm, bo); } - /* Move fence waiting after getting reservation lock of - * PD root. Then there is no need on a ctx mutex lock. - */ - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - goto error_validate; - } - amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; @@ -611,197 +967,139 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (r) goto error_validate; - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, - p->bytes_moved_vis); + if (p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; + r = amdgpu_ttm_alloc_gart(&uf->tbo); + if (r) + goto error_validate; - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; - p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; - p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; - p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf); } - if (!r && p->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); - r = amdgpu_ttm_alloc_gart(&uf->tbo); - p->job->uf_addr += amdgpu_bo_gpu_offset(uf); - } + for (i = 0; i < p->gang_size; ++i) + amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, + p->bo_list->gws_obj, + p->bo_list->oa_obj); + return 0; error_validate: - if (r) - ttm_eu_backoff_reservation(&p->ticket, &p->validated); + ttm_eu_backoff_reservation(&p->ticket, &p->validated); out_free_user_pages: - if (r) { - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (!e->user_pages) - continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - kvfree(e->user_pages); - e->user_pages = NULL; - } - mutex_unlock(&p->bo_list->bo_list_mutex); + if (!e->user_pages) + continue; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + kvfree(e->user_pages); + e->user_pages = NULL; } return r; } -static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) +static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_bo_list_entry *e; - int r; + int i, j; - list_for_each_entry(e, &p->validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct dma_resv *resv = bo->tbo.base.resv; - enum amdgpu_sync_mode sync_mode; + if (!trace_amdgpu_cs_enabled()) + return; - sync_mode = amdgpu_bo_explicit_sync(bo) ? - AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, - &fpriv->vm); - if (r) - return r; + for (i = 0; i < p->gang_size; ++i) { + struct amdgpu_job *job = p->jobs[i]; + + for (j = 0; j < job->num_ibs; ++j) + trace_amdgpu_cs(p, job, &job->ibs[j]); } - return 0; } -/** - * amdgpu_cs_parser_fini() - clean parser states - * @parser: parser structure holding parsing context. - * @error: error number - * @backoff: indicator to backoff the reservation - * - * If error is set then unvalidate buffer, otherwise just free memory - * used by parsing context. - **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, - bool backoff) +static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { - unsigned i; + struct amdgpu_ring *ring = amdgpu_job_ring(job); + unsigned int i; + int r; - if (error && backoff) { - ttm_eu_backoff_reservation(&parser->ticket, - &parser->validated); - mutex_unlock(&parser->bo_list->bo_list_mutex); - } + /* Only for UVD/VCE VM emulation */ + if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place) + return 0; - for (i = 0; i < parser->num_post_deps; i++) { - drm_syncobj_put(parser->post_deps[i].syncobj); - kfree(parser->post_deps[i].chain); - } - kfree(parser->post_deps); + for (i = 0; i < job->num_ibs; ++i) { + struct amdgpu_ib *ib = &job->ibs[i]; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj; + uint64_t va_start; + uint8_t *kptr; - dma_fence_put(parser->fence); + va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } - if (parser->ctx) { - mutex_unlock(&parser->ctx->lock); - amdgpu_ctx_put(parser->ctx); + if ((va_start + ib->length_dw * 4) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); + + if (ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, ib->length_dw * 4); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, job, ib); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } } - if (parser->bo_list) - amdgpu_bo_list_put(parser->bo_list); - for (i = 0; i < parser->nchunks; i++) - kvfree(parser->chunks[i].kdata); - kvfree(parser->chunks); - if (parser->job) - amdgpu_job_free(parser->job); - if (parser->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + return 0; +} - amdgpu_bo_unref(&uf); +static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p) +{ + unsigned int i; + int r; + + for (i = 0; i < p->gang_size; ++i) { + r = amdgpu_cs_patch_ibs(p, p->jobs[i]); + if (r) + return r; } + return 0; } static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_job *job = p->gang_leader; struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; + unsigned int i; int r; - /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { - unsigned i, j; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - struct amdgpu_cs_chunk *chunk; - uint64_t offset, va_start; - struct amdgpu_ib *ib; - uint8_t *kptr; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += va_start - offset; - - if (ring->funcs->parse_cs) { - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - - r = amdgpu_ring_parse_cs(ring, p, p->job, ib); - if (r) - return r; - } else { - ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib); - amdgpu_bo_kunmap(aobj); - if (r) - return r; - } - - j++; - } - } - - if (!p->job->vm) - return amdgpu_cs_sync_rings(p); - - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -810,18 +1108,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); if (r) return r; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (fpriv->csa_va) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); if (r) return r; } @@ -840,7 +1138,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); if (r) return r; } @@ -853,11 +1151,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->sync, vm->last_update); if (r) return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); + for (i = 0; i < p->gang_size; ++i) { + job = p->jobs[i]; + + if (!job->vm) + continue; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); + } if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ @@ -872,331 +1177,39 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } - return amdgpu_cs_sync_rings(p); -} - -static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) -{ - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - int r, ce_preempt = 0, de_preempt = 0; - struct amdgpu_ring *ring; - int i, j; - - for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct drm_sched_entity *entity; - - chunk = &parser->chunks[i]; - ib = &parser->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) - ce_preempt++; - else - de_preempt++; - } - - /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ - if (ce_preempt > 1 || de_preempt > 1) - return -EINVAL; - } - - r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &entity); - if (r) - return r; - - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) - parser->job->preamble_status |= - AMDGPU_PREAMBLE_IB_PRESENT; - - if (parser->entity && parser->entity != entity) - return -EINVAL; - - /* Return if there is no run queue associated with this entity. - * Possibly because of disabled HW IP*/ - if (entity->rq == NULL) - return -EINVAL; - - parser->entity = entity; - - ring = to_amdgpu_ring(entity->rq->sched); - r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, - AMDGPU_IB_POOL_DELAYED, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - ib->gpu_addr = chunk_ib->va_start; - ib->length_dw = chunk_ib->ib_bytes / 4; - ib->flags = chunk_ib->flags; - - j++; - } - - /* MM engine doesn't support user fences */ - ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ring->funcs->no_user_fence) - return -EINVAL; - return 0; } -static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) +static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; - int i, r; - struct drm_amdgpu_cs_chunk_dep *deps; - - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_ctx *ctx; - struct drm_sched_entity *entity; - struct dma_fence *fence; - - ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); - if (ctx == NULL) - return -EINVAL; - - r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &entity); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } - - fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); - amdgpu_ctx_put(ctx); - - if (IS_ERR(fence)) - return PTR_ERR(fence); - else if (!fence) - continue; - - if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence; - struct dma_fence *old = fence; - - s_fence = to_drm_sched_fence(fence); - fence = dma_fence_get(&s_fence->scheduled); - dma_fence_put(old); - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - if (r) - return r; - } - return 0; -} - -static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle, u64 point, - u64 flags) -{ - struct dma_fence *fence; + struct amdgpu_bo_list_entry *e; + unsigned int i; int r; - r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); - if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", - handle, point, r); - return r; - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - - return r; -} - -static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i, r; + list_for_each_entry(e, &p->validated, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct dma_resv *resv = bo->tbo.base.resv; + enum amdgpu_sync_mode sync_mode; - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, - 0, 0); + sync_mode = amdgpu_bo_explicit_sync(bo) ? + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; + r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode, + &fpriv->vm); if (r) return r; } - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i, r; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, - syncobj_deps[i].handle, - syncobj_deps[i].point, - syncobj_deps[i].flags); + for (i = 0; i < p->gang_size; ++i) { + r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); if (r) return r; } - return 0; -} - -static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - - for (i = 0; i < num_deps; ++i) { - p->post_deps[i].syncobj = - drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_deps[i].syncobj) - return -EINVAL; - p->post_deps[i].chain = NULL; - p->post_deps[i].point = 0; - p->num_post_deps++; - } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; - - dep->chain = NULL; - if (syncobj_deps[i].point) { - dep->chain = dma_fence_chain_alloc(); - if (!dep->chain) - return -ENOMEM; - } - - dep->syncobj = drm_syncobj_find(p->filp, - syncobj_deps[i].handle); - if (!dep->syncobj) { - dma_fence_chain_free(dep->chain); - return -EINVAL; - } - dep->point = syncobj_deps[i].point; - p->num_post_deps++; - } - - return 0; -} - -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - int i, r; - - /* TODO: Investigate why we still need the context lock */ - mutex_unlock(&p->ctx->lock); - - for (i = 0; i < p->nchunks; ++i) { - struct amdgpu_cs_chunk *chunk; - - chunk = &p->chunks[i]; + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); + if (r && r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - switch (chunk->chunk_id) { - case AMDGPU_CHUNK_ID_DEPENDENCIES: - case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: - r = amdgpu_cs_process_fence_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_IN: - r = amdgpu_cs_process_syncobj_in_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: - r = amdgpu_cs_process_syncobj_out_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: - r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); - if (r) - goto out; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: - r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); - if (r) - goto out; - break; - } - } - -out: - mutex_lock(&p->ctx->lock); return r; } @@ -1221,20 +1234,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct drm_sched_entity *entity = p->entity; + struct amdgpu_job *leader = p->gang_leader; struct amdgpu_bo_list_entry *e; - struct amdgpu_job *job; + unsigned int i; uint64_t seq; int r; - job = p->job; - p->job = NULL; + for (i = 0; i < p->gang_size; ++i) + drm_sched_job_arm(&p->jobs[i]->base); - r = drm_sched_job_init(&job->base, entity, &fpriv->vm); - if (r) - goto error_unlock; + for (i = 0; i < (p->gang_size - 1); ++i) { + struct dma_fence *fence; - drm_sched_job_arm(&job->base); + fence = &p->jobs[i]->base.s_fence->scheduled; + r = drm_sched_job_add_dependency(&leader->base, fence); + if (r) + goto error_cleanup; + } + + if (p->gang_size > 1) { + for (i = 0; i < p->gang_size; ++i) + amdgpu_job_set_gang_leader(p->jobs[i], leader); + } /* No memory allocation is allowed while holding the notifier lock. * The lock is held until amdgpu_cs_submit is finished and fence is @@ -1245,6 +1266,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. */ + r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -1252,67 +1274,96 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } if (r) { r = -EAGAIN; - goto error_abort; + goto error_unlock; } - p->fence = dma_fence_get(&job->base.s_fence->finished); + p->fence = dma_fence_get(&leader->base.s_fence->finished); + list_for_each_entry(e, &p->validated, tv.head) { + + /* Everybody except for the gang leader uses READ */ + for (i = 0; i < (p->gang_size - 1); ++i) { + dma_resv_add_fence(e->tv.bo->base.resv, + &p->jobs[i]->base.s_fence->finished, + DMA_RESV_USAGE_READ); + } - seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence); + /* The gang leader is remembered as writer */ + e->tv.num_shared = 0; + } + + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], + p->fence); amdgpu_cs_post_dependencies(p); - if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && + if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && !p->ctx->preamble_presented) { - job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; p->ctx->preamble_presented = true; } cs->out.handle = seq; - job->uf_sequence = seq; - - amdgpu_job_free_resources(job); + leader->uf_sequence = seq; - trace_amdgpu_cs_ioctl(job); amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); - drm_sched_entity_push_job(&job->base); + for (i = 0; i < p->gang_size; ++i) { + amdgpu_job_free_resources(p->jobs[i]); + trace_amdgpu_cs_ioctl(p->jobs[i]); + drm_sched_entity_push_job(&p->jobs[i]->base); + p->jobs[i] = NULL; + } amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - - /* Make sure all BOs are remembered as writers */ - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 0; - ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); + mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); - return 0; -error_abort: - drm_sched_job_cleanup(&job->base); +error_unlock: mutex_unlock(&p->adev->notifier_lock); -error_unlock: - amdgpu_job_free(job); +error_cleanup: + for (i = 0; i < p->gang_size; ++i) + drm_sched_job_cleanup(&p->jobs[i]->base); return r; } -static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) +/* Cleanup the parser structure */ +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { - int i; + unsigned i; - if (!trace_amdgpu_cs_enabled()) - return; + for (i = 0; i < parser->num_post_deps; i++) { + drm_syncobj_put(parser->post_deps[i].syncobj); + kfree(parser->post_deps[i].chain); + } + kfree(parser->post_deps); + + dma_fence_put(parser->fence); + + if (parser->ctx) + amdgpu_ctx_put(parser->ctx); + if (parser->bo_list) + amdgpu_bo_list_put(parser->bo_list); + + for (i = 0; i < parser->nchunks; i++) + kvfree(parser->chunks[i].kdata); + kvfree(parser->chunks); + for (i = 0; i < parser->gang_size; ++i) { + if (parser->jobs[i]) + amdgpu_job_free(parser->jobs[i]); + } + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); - for (i = 0; i < parser->job->num_ibs; i++) - trace_amdgpu_cs(parser, i); + amdgpu_bo_unref(&uf); + } } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = drm_to_adev(dev); - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser = {}; - bool reserved_buffers = false; + struct amdgpu_cs_parser parser; int r; if (amdgpu_ras_intr_triggered()) @@ -1321,25 +1372,20 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - parser.adev = adev; - parser.filp = filp; - - r = amdgpu_cs_parser_init(&parser, data); + r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { if (printk_ratelimit()) DRM_ERROR("Failed to initialize parser %d!\n", r); - goto out; + return r; } - r = amdgpu_cs_ib_fill(adev, &parser); + r = amdgpu_cs_pass1(&parser, data); if (r) - goto out; + goto error_fini; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } + r = amdgpu_cs_pass2(&parser); + if (r) + goto error_fini; r = amdgpu_cs_parser_bos(&parser, data); if (r) { @@ -1347,22 +1393,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) DRM_ERROR("Failed to process the buffer list %d!\n", r); - goto out; + goto error_fini; } - reserved_buffers = true; + r = amdgpu_cs_patch_jobs(&parser); + if (r) + goto error_backoff; + + r = amdgpu_cs_vm_handling(&parser); + if (r) + goto error_backoff; + + r = amdgpu_cs_sync_rings(&parser); + if (r) + goto error_backoff; trace_amdgpu_cs_ibs(&parser); - r = amdgpu_cs_vm_handling(&parser); + r = amdgpu_cs_submit(&parser, data); if (r) - goto out; + goto error_backoff; - r = amdgpu_cs_submit(&parser, cs); + amdgpu_cs_parser_fini(&parser); + return 0; -out: - amdgpu_cs_parser_fini(&parser, r, reserved_buffers); +error_backoff: + ttm_eu_backoff_reservation(&parser.ticket, &parser.validated); + mutex_unlock(&parser.bo_list->bo_list_mutex); +error_fini: + amdgpu_cs_parser_fini(&parser); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 30ecc4917f81..207e801c24ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -27,6 +27,8 @@ #include "amdgpu_bo_list.h" #include "amdgpu_ring.h" +#define AMDGPU_CS_GANG_SIZE 4 + struct amdgpu_bo_va_mapping; struct amdgpu_cs_chunk { @@ -50,9 +52,11 @@ struct amdgpu_cs_parser { unsigned nchunks; struct amdgpu_cs_chunk *chunks; - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; + /* scheduler job objects */ + unsigned int gang_size; + struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; + struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; + struct amdgpu_job *gang_leader; /* buffer objects */ struct ww_acquire_ctx ticket; @@ -71,6 +75,8 @@ struct amdgpu_cs_parser { unsigned num_post_deps; struct amdgpu_cs_post_dep *post_deps; + + struct amdgpu_sync sync; }; int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3ea48385fab3..d2139ac12159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -315,7 +315,6 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, kref_init(&ctx->refcount); ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -327,7 +326,10 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, if (r) return r; - ctx->stable_pstate = current_stable_pstate; + if (mgr->adev->pm.stable_pstate_ctx) + ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate; + else + ctx->stable_pstate = current_stable_pstate; return 0; } @@ -407,7 +409,6 @@ static void amdgpu_ctx_fini(struct kref *ref) drm_dev_exit(idx); } - mutex_destroy(&ctx->lock); kfree(ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index cc7c8afff414..0fa0e56daf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,7 +53,6 @@ struct amdgpu_ctx { bool preamble_presented; int32_t init_priority; int32_t override_priority; - struct mutex lock; atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6066aebf491c..de61a85c4b02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1954,8 +1954,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) return PTR_ERR(ent); } - debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask); - /* Register debugfs entries for amdgpu_ttm */ amdgpu_ttm_debugfs_init(adev); amdgpu_debugfs_pm_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 62b26f0e37b0..ad4e78728733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -37,6 +37,7 @@ #include <linux/pci-p2pdma.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include <linux/vgaarb.h> @@ -1568,7 +1569,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) * @pdev: pci dev pointer * @state: vga_switcheroo state * - * Callback for the switcheroo driver. Suspends or resumes the + * Callback for the switcheroo driver. Suspends or resumes * the asics before or after it is powered up using ACPI methods. */ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, @@ -2365,8 +2366,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* need to do common hw init early so everything is set up for gmc */ + r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); + if (r) { + DRM_ERROR("hw_init %d failed %d\n", i, r); + goto init_failed; + } + adev->ip_blocks[i].status.hw = true; + } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + /* need to do gmc hw init early so we can allocate gpu mem */ /* Try to reserve bad pages early */ if (amdgpu_sriov_vf(adev)) amdgpu_virt_exchange_data(adev); @@ -2389,7 +2398,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_CSA_SIZE); @@ -2451,19 +2460,21 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) */ if (adev->gmc.xgmi.num_physical_nodes > 1) { if (amdgpu_xgmi_add_device(adev) == 0) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (!amdgpu_sriov_vf(adev)) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + + if (!hive->reset_domain || + !amdgpu_reset_get_reset_domain(hive->reset_domain)) { + r = -ENOENT; + amdgpu_put_xgmi_hive(hive); + goto init_failed; + } - if (!hive->reset_domain || - !amdgpu_reset_get_reset_domain(hive->reset_domain)) { - r = -ENOENT; + /* Drop the early temporary reset domain we created for device */ + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = hive->reset_domain; amdgpu_put_xgmi_hive(hive); - goto init_failed; } - - /* Drop the early temporary reset domain we created for device */ - amdgpu_reset_put_reset_domain(adev->reset_domain); - adev->reset_domain = hive->reset_domain; - amdgpu_put_xgmi_hive(hive); } } @@ -2918,6 +2929,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + /* + * Per PMFW team's suggestion, driver needs to handle gfxoff + * and df cstate features disablement for gpu reset(e.g. Mode1Reset) + * scenario. Add the missing df cstate disablement here. + */ + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to disallow df cstate"); + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; @@ -3052,8 +3071,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) int i, r; static enum amd_ip_block_type ip_order[] = { - AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_GMC, AMD_IP_BLOCK_TYPE_PSP, AMD_IP_BLOCK_TYPE_IH, }; @@ -3144,7 +3163,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { @@ -3191,6 +3211,15 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.hw = true; + + if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + /* disable gfxoff for IP resume. The gfxoff will be re-enabled in + * amdgpu_device_resume() after IP resume. + */ + amdgpu_gfx_off_ctrl(adev, false); + DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n"); + } + } return 0; @@ -3501,6 +3530,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; + RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; @@ -3982,6 +4012,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; + dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); amdgpu_reset_fini(adev); @@ -4030,15 +4061,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) * at suspend time. * */ -static void amdgpu_device_evict_resources(struct amdgpu_device *adev) +static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { + int ret; + /* No need to evict vram on APUs for suspend to ram or s2idle */ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) - return; + return 0; - if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) + ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); + if (ret) DRM_WARN("evicting device resources failed\n"); - + return ret; } /* @@ -4057,12 +4091,20 @@ static void amdgpu_device_evict_resources(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); + int r = 0; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; adev->in_suspend = true; + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_virt_request_full_gpu(adev, false); + if (r) + return r; + } + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); @@ -4080,12 +4122,17 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - amdgpu_device_evict_resources(adev); + r = amdgpu_device_evict_resources(adev); + if (r) + return r; amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, false); + return 0; } @@ -4104,6 +4151,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) struct amdgpu_device *adev = drm_to_adev(dev); int r = 0; + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return r; + } + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -4118,6 +4171,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } r = amdgpu_device_ip_resume(adev); + + /* no matter what r is, always need to properly release full GPU */ + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_data_exchange(adev); + amdgpu_virt_release_full_gpu(adev, true); + } + if (r) { dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; @@ -4138,8 +4198,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } /* Make sure IB tests flushed */ + if (amdgpu_sriov_vf(adev)) + amdgpu_irq_gpu_reset_resume_helper(adev); flush_delayed_work(&adev->delayed_init_work); + if (adev->in_s0ix) { + /* re-enable gfxoff after IP resume. This re-enables gfxoff after + * it was disabled for IP resume in amdgpu_device_ip_resume_phase2(). + */ + amdgpu_gfx_off_ctrl(adev, true); + DRM_DEBUG("will enable gfxoff for the mission mode\n"); + } if (fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); @@ -4739,6 +4808,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; + bool gpu_reset_for_dev_remove = 0; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, @@ -4758,6 +4828,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); + gpu_reset_for_dev_remove = + test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + /* * ASIC reset has to be done on all XGMI hive nodes ASAP * to allow proper links negotiation in FW (within 1 sec) @@ -4802,6 +4876,18 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, amdgpu_ras_intr_cleared(); } + /* Since the mode1 reset affects base ip blocks, the + * phase1 ip blocks need to be resumed. Otherwise there + * will be a BIOS signature error and the psp bootloader + * can't load kdb on the next amdgpu install. + */ + if (gpu_reset_for_dev_remove) { + list_for_each_entry(tmp_adev, device_list_handle, reset_list) + amdgpu_device_ip_resume_phase1(tmp_adev); + + goto end; + } + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ @@ -5124,6 +5210,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, bool need_emergency_restart = false; bool audio_suspended = false; int tmp_vram_lost_counter; + bool gpu_reset_for_dev_remove = false; + + gpu_reset_for_dev_remove = + test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && + test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); /* * Special case: RAS triggered and full reset isn't supported @@ -5151,7 +5242,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->job = job; reset_context->hive = hive; - /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5159,8 +5249,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ INIT_LIST_HEAD(&device_list); if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); + if (gpu_reset_for_dev_remove && adev->shutdown) + tmp_adev->shutdown = true; + } if (!list_is_first(&adev->reset_list, &device_list)) list_rotate_to_front(&adev->reset_list, &device_list); device_list_handle = &device_list; @@ -5243,6 +5336,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + if (gpu_reset_for_dev_remove) { + /* Workaroud for ASICs need to disable SMC first */ + amdgpu_device_smu_fini_early(tmp_adev); + } r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -5271,11 +5368,11 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); - if (r && r == -EAGAIN) { - set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags); - adev->asic_reset_res = 0; + if (r && r == -EAGAIN) goto retry; - } + + if (!r && gpu_reset_for_dev_remove) + goto recover_end; } skip_hw_reset: @@ -5308,7 +5405,7 @@ skip_hw_reset: drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } - if (adev->enable_mes) + if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) amdgpu_mes_self_test(tmp_adev); if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { @@ -5349,6 +5446,7 @@ skip_sched_resume: amdgpu_device_unset_mp1_state(tmp_adev); } +recover_end: tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); @@ -5531,9 +5629,9 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size - 1; - bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && - !(pci_p2pdma_distance_many(adev->pdev, - &peer_adev->dev, 1, true) < 0); + bool p2p_access = + !adev->gmc.xgmi.connected_to_cpu && + !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && adev->gmc.real_vram_size == adev->gmc.visible_vram_size && @@ -5707,7 +5805,6 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); - set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); @@ -5917,3 +6014,36 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, (void)RREG32(data); spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } + +/** + * amdgpu_device_switch_gang - switch to a new gang + * @adev: amdgpu_device pointer + * @gang: the gang to switch to + * + * Try to switch to a new gang. + * Returns: NULL if we switched to the new gang or a reference to the current + * gang leader. + */ +struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, + struct dma_fence *gang) +{ + struct dma_fence *old = NULL; + + do { + dma_fence_put(old); + rcu_read_lock(); + old = dma_fence_get_rcu_safe(&adev->gang_submit); + rcu_read_unlock(); + + if (old == gang) + break; + + if (!dma_fence_is_signaled(old)) + return old; + + } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, + old, gang) != old); + + dma_fence_put(old); + return NULL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 9fa2a5ceb77d..3993e6134914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -229,7 +229,7 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, ui return r; } - memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size); + memcpy((u8 *)binary, (u8 *)fw->data, fw->size); release_firmware(fw); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c20922a5af9f..311a8ea6f065 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -39,8 +39,8 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> -#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -1101,6 +1101,7 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, goto err; ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (ret) goto err; @@ -1213,7 +1214,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, const struct drm_mode_config_funcs amdgpu_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, }; static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 782cbca37538..7bd8e33b14be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -58,7 +58,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r; - if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0) + if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 728a0933ea6f..6b35bb948f96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -25,6 +25,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_aperture.h> #include <drm/drm_drv.h> +#include <drm/drm_fbdev_generic.h> #include <drm/drm_gem.h> #include <drm/drm_vblank.h> #include <drm/drm_managed.h> @@ -38,6 +39,8 @@ #include <linux/mmu_notifier.h> #include <linux/suspend.h> #include <linux/cc_platform.h> +#include <linux/fb.h> +#include <linux/dynamic_debug.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -102,9 +105,10 @@ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags * - 3.48.0 - Add IP discovery version info to HW INFO + * 3.49.0 - Add gang submit into CS IOCTL */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 48 +#define KMS_DRIVER_MINOR 49 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -185,6 +189,18 @@ int amdgpu_vcnfw_log; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); +DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0, + "DRM_UT_CORE", + "DRM_UT_DRIVER", + "DRM_UT_KMS", + "DRM_UT_PRIME", + "DRM_UT_ATOMIC", + "DRM_UT_VBL", + "DRM_UT_STATE", + "DRM_UT_LEASE", + "DRM_UT_DP", + "DRM_UT_DRMRES"); + struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), .delayed_reset_work = __DELAYED_WORK_INITIALIZER( @@ -2186,6 +2202,38 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + !amdgpu_sriov_vf(adev)) { + bool need_to_reset_gpu = false; + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + struct amdgpu_hive_info *hive; + + hive = amdgpu_get_xgmi_hive(adev); + if (hive->device_remove_count == 0) + need_to_reset_gpu = true; + hive->device_remove_count++; + amdgpu_put_xgmi_hive(hive); + } else { + need_to_reset_gpu = true; + } + + /* Workaround for ASICs need to reset SMU. + * Called only when the first device is removed. + */ + if (need_to_reset_gpu) { + struct amdgpu_reset_context reset_context; + + adev->shutdown = true; + memset(&reset_context, 0, sizeof(reset_context)); + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } + } + amdgpu_driver_unload_kms(dev); drm_dev_unplug(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8adeb7469f1e..d0d99ed607dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -400,7 +400,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) /* We are not protected by ring lock when reading the last sequence * but it's ok to report slightly wrong fence count here. */ - amdgpu_fence_process(ring); emitted = 0x100000000ull; emitted -= atomic_read(&ring->fence_drv.last_seq); emitted += READ_ONCE(ring->fence_drv.sync_seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ceb91469958a..9546adc8a76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -23,6 +23,7 @@ * */ +#include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" @@ -865,3 +866,142 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) } return amdgpu_num_kcq; } + +void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, + uint32_t ucode_id) +{ + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; + struct amdgpu_firmware_info *info = NULL; + const struct firmware *ucode_fw; + unsigned int fw_size; + + switch (ucode_id) { + case AMDGPU_UCODE_ID_CP_PFP: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_PFP: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.pfp_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.pfp_fw->data; + ucode_fw = adev->gfx.pfp_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_ME: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + adev->gfx.me_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_ME: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + adev->gfx.me_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.me_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.me_fw->data; + ucode_fw = adev->gfx.me_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_CE: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.ce_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_MEC1: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + ucode_fw = adev->gfx.mec2_fw; + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + ucode_fw = adev->gfx.mec2_fw; + fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = + le32_to_cpu(cp_hdr_v2_0->header.ucode_version); + adev->gfx.mec_feature_version = + le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); + break; + case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: + case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: + cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) + adev->gfx.mec_fw->data; + ucode_fw = adev->gfx.mec_fw; + fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); + break; + default: + break; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[ucode_id]; + info->ucode_id = ucode_id; + info->fw = ucode_fw; + adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8abdf41d0f83..832b3807f1d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -304,6 +304,10 @@ struct amdgpu_gfx { uint32_t rlc_srlg_feature_version; uint32_t rlc_srls_fw_version; uint32_t rlc_srls_feature_version; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; bool mec_fw_write_wait; @@ -422,4 +426,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); +void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index aebc384531ac..9c0d9baab4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -542,6 +542,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): + case IP_VERSION(11, 0, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -572,45 +573,15 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) { struct amdgpu_gmc *gmc = &adev->gmc; - - switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(9, 0, 1): - case IP_VERSION(9, 3, 0): - case IP_VERSION(9, 4, 0): - case IP_VERSION(9, 4, 1): - case IP_VERSION(9, 4, 2): - case IP_VERSION(10, 3, 3): - case IP_VERSION(10, 3, 4): - case IP_VERSION(10, 3, 5): - case IP_VERSION(10, 3, 6): - case IP_VERSION(10, 3, 7): - /* - * noretry = 0 will cause kfd page fault tests fail - * for some ASICs, so set default to 1 for these ASICs. - */ - if (amdgpu_noretry == -1) - gmc->noretry = 1; - else - gmc->noretry = amdgpu_noretry; - break; - default: - /* Raven currently has issues with noretry - * regardless of what we decide for other - * asics, we should leave raven with - * noretry = 0 until we root cause the - * issues. - * - * default this to 0 for now, but we may want - * to change this in the future for certain - * GPUs as it can increase performance in - * certain cases. - */ - if (amdgpu_noretry == -1) - gmc->noretry = 0; - else - gmc->noretry = amdgpu_noretry; - break; - } + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; + bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || + gc_ver == IP_VERSION(9, 3, 0) || + gc_ver == IP_VERSION(9, 4, 0) || + gc_ver == IP_VERSION(9, 4, 1) || + gc_ver == IP_VERSION(9, 4, 2) || + gc_ver >= IP_VERSION(10, 3, 0)); + + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 1f3302aebeff..44367f03316f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -144,7 +144,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, node->base.start = node->mm_nodes[0].start; } else { node->mm_nodes[0].start = 0; - node->mm_nodes[0].size = node->base.num_pages; + node->mm_nodes[0].size = PFN_UP(node->base.size); node->base.start = AMDGPU_BO_INVALID_OFFSET; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 258cffe3c06a..774c77bb8f4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -182,7 +182,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 03d115d2b5ed..2a9a2593dc18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -170,26 +170,27 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies * @idle: resulting idle VMID + * @fence: fence to wait for if no id could be grabbed * * Try to find an idle VMID, if none is idle add a fence to wait to the sync * object. Returns -ENOMEM when we are out of memory. */ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct amdgpu_vmid **idle) + struct amdgpu_vmid **idle, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct dma_fence **fences; unsigned i; - int r; - if (!dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(sync, ring->vmid_wait); + if (!dma_fence_is_signaled(ring->vmid_wait)) { + *fence = dma_fence_get(ring->vmid_wait); + return 0; + } fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); if (!fences) @@ -228,10 +229,10 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(sync, &array->base); + *fence = dma_fence_get(&array->base); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; - return r; + return 0; } kfree(fences); @@ -243,19 +244,17 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed * * Try to assign a reserved VMID. */ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, struct amdgpu_job *job, - struct amdgpu_vmid **id) + struct amdgpu_vmid **id, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -282,7 +281,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - return amdgpu_sync_fence(sync, tmp); + *fence = dma_fence_get(tmp); + return 0; } needs_flush = true; } @@ -290,7 +290,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence); + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); if (r) return r; @@ -304,19 +304,17 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed * * Try to reuse a VMID for this submission. */ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, struct amdgpu_job *job, - struct amdgpu_vmid **id) + struct amdgpu_vmid **id, + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -352,7 +350,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence); + r = amdgpu_sync_fence(&(*id)->active, + &job->base.s_fence->finished); if (r) return r; @@ -370,15 +369,13 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * * @vm: vm to allocate id for * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID + * @fence: fence to wait for if no id could be grabbed * * Allocate an id for the vm, adding fences to the sync obj as necessary. */ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) + struct amdgpu_job *job, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -388,16 +385,16 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int r = 0; mutex_lock(&id_mgr->lock); - r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); + r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence); if (r || !idle) goto error; if (vm->reserved_vmid[vmhub]) { - r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); + r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; } else { - r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); + r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence); if (r) goto error; @@ -406,7 +403,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(&id->active, fence); + r = amdgpu_sync_fence(&id->active, + &job->base.s_fence->finished); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 06c8a0034fa5..57efe61dceed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -84,8 +84,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job); + struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); void amdgpu_vmid_reset_all(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1062b7ed74ec..032651a655f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,7 +72,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) @@ -89,8 +88,9 @@ exit: return DRM_GPU_SCHED_STAT_NOMINAL; } -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job, struct amdgpu_vm *vm) +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct drm_sched_entity *entity, void *owner, + unsigned int num_ibs, struct amdgpu_job **job) { if (num_ibs == 0) return -EINVAL; @@ -105,33 +105,56 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; - (*job)->num_ibs = num_ibs; - amdgpu_sync_create(&(*job)->sync); - amdgpu_sync_create(&(*job)->sched_sync); + amdgpu_sync_create(&(*job)->explicit_sync); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; - return 0; + if (!entity) + return 0; + + return drm_sched_job_init(&(*job)->base, entity, owner); } -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, - enum amdgpu_ib_pool_type pool_type, - struct amdgpu_job **job) +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, + struct drm_sched_entity *entity, void *owner, + size_t size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_job **job) { int r; - r = amdgpu_job_alloc(adev, 1, job, NULL); + r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job); if (r) return r; + (*job)->num_ibs = 1; r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]); - if (r) + if (r) { + if (entity) + drm_sched_job_cleanup(&(*job)->base); kfree(*job); + } return r; } +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa) +{ + if (gds) { + job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; + } + if (gws) { + job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; + } + if (oa) { + job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; + } +} + void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); @@ -150,17 +173,35 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) drm_sched_job_cleanup(s_job); - amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->sched_sync); - + amdgpu_sync_free(&job->explicit_sync); dma_fence_put(&job->hw_fence); } +void amdgpu_job_set_gang_leader(struct amdgpu_job *job, + struct amdgpu_job *leader) +{ + struct dma_fence *fence = &leader->base.s_fence->scheduled; + + WARN_ON(job->gang_submit); + + /* + * Don't add a reference when we are the gang leader to avoid circle + * dependency. + */ + if (job != leader) + dma_fence_get(fence); + job->gang_submit = fence; +} + void amdgpu_job_free(struct amdgpu_job *job) { + if (job->base.entity) + drm_sched_job_cleanup(&job->base); + amdgpu_job_free_resources(job); - amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->sched_sync); + amdgpu_sync_free(&job->explicit_sync); + if (job->gang_submit != &job->base.s_fence->scheduled) + dma_fence_put(job->gang_submit); if (!job->hw_fence.ops) kfree(job); @@ -168,25 +209,16 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(&job->hw_fence); } -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, - void *owner, struct dma_fence **f) +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) { - int r; - - if (!f) - return -EINVAL; - - r = drm_sched_job_init(&job->base, entity, owner); - if (r) - return r; + struct dma_fence *f; drm_sched_job_arm(&job->base); - - *f = dma_fence_get(&job->base.s_fence->finished); + f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); drm_sched_entity_push_job(&job->base); - return 0; + return f; } int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, @@ -204,38 +236,31 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, return 0; } -static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, - struct drm_sched_entity *s_entity) +static struct dma_fence * +amdgpu_job_prepare_job(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct amdgpu_vm *vm = job->vm; - struct dma_fence *fence; + struct dma_fence *fence = NULL; int r; - fence = amdgpu_sync_get_fence(&job->sync); - if (fence && drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(&job->sched_sync, fence); - if (r) - DRM_ERROR("Error adding fence (%d)\n", r); - } - - while (fence == NULL && vm && !job->vmid) { - r = amdgpu_vmid_grab(vm, ring, &job->sync, - &job->base.s_fence->finished, - job); + while (!fence && job->vm && !job->vmid) { + r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - - fence = amdgpu_sync_get_fence(&job->sync); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + return fence; } static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) { struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched); + struct amdgpu_device *adev = ring->adev; struct dma_fence *fence = NULL, *finished; struct amdgpu_job *job; int r = 0; @@ -243,12 +268,12 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) job = to_amdgpu_job(sched_job); finished = &job->base.s_fence->finished; - BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); - trace_amdgpu_sched_run_job(job); - if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter)) - dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ + /* Skip job if VRAM is lost and never resubmit gangs */ + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) || + (job->job_run_counter && job->gang_submit)) + dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { DRM_INFO("Skip scheduling IBs!\n"); @@ -301,7 +326,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) } const struct drm_sched_backend_ops amdgpu_sched_ops = { - .dependency = amdgpu_job_dependency, + .prepare_job = amdgpu_job_prepare_job, .run_job = amdgpu_job_run, .timedout_job = amdgpu_job_timedout, .free_job = amdgpu_job_free_cb diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index babc0af751c2..a372802ea4e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -47,9 +47,9 @@ enum amdgpu_ib_pool_type; struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; - struct amdgpu_sync sync; - struct amdgpu_sync sched_sync; + struct amdgpu_sync explicit_sync; struct dma_fence hw_fence; + struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; @@ -72,14 +72,25 @@ struct amdgpu_job { struct amdgpu_ib ibs[]; }; -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job, struct amdgpu_vm *vm); -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, - enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) +{ + return to_amdgpu_ring(job->base.entity->rq->sched); +} + +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct drm_sched_entity *entity, void *owner, + unsigned int num_ibs, struct amdgpu_job **job); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, + struct drm_sched_entity *entity, void *owner, + size_t size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_job **job); +void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, + struct amdgpu_bo *gws, struct amdgpu_bo *oa); void amdgpu_job_free_resources(struct amdgpu_job *job); +void amdgpu_job_set_gang_leader(struct amdgpu_job *job, + struct amdgpu_job *leader); void amdgpu_job_free(struct amdgpu_job *job); -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, - void *owner, struct dma_fence **f); +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job); int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 518eb0e40d32..de182bfcf85f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -150,14 +150,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, const unsigned ib_size_dw = 16; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, + PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 77668c3dae5b..ba348046fcec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> +#include <drm/drm_fb_helper.h> #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "atom.h" @@ -247,6 +248,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_srls_fw_version; fw_info->feature = adev->gfx.rlc_srls_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLCP: + fw_info->ver = adev->gfx.rlcp_ucode_version; + fw_info->feature = adev->gfx.rlcp_ucode_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLCV: + fw_info->ver = adev->gfx.rlcv_ucode_version; + fw_info->feature = adev->gfx.rlcv_ucode_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -328,6 +337,20 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->psp.cap_fw_version; fw_info->feature = adev->psp.cap_feature_version; break; + case AMDGPU_INFO_FW_MES_KIQ: + fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; + break; + case AMDGPU_INFO_FW_MES: + fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; + break; + case AMDGPU_INFO_FW_IMU: + fw_info->ver = adev->gfx.imu_fw_version; + fw_info->feature = 0; + break; default: return -EINVAL; } @@ -774,7 +797,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) + if (amdgpu_mcbp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1150,7 +1173,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, @@ -1214,7 +1237,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) amdgpu_vce_free_handles(adev, file_priv); - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); amdgpu_vm_bo_del(adev, fpriv->csa_va); @@ -1469,6 +1492,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLCP */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLCV */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; @@ -1488,6 +1527,15 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) fw_info.feature, fw_info.ver); } + /* IMU */ + query_fw.fw_type = AMDGPU_INFO_FW_IMU; + query_fw.index = 0; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* PSP SOS */ query_fw.fw_type = AMDGPU_INFO_FW_SOS; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); @@ -1581,6 +1629,22 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) fw_info.feature, fw_info.ver); } + /* MES_KIQ */ + query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* MES */ + query_fw.fw_type = AMDGPU_INFO_FW_MES; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7b46f6bf4187..97c05d08a551 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -91,14 +91,12 @@ struct amdgpu_mes { struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES]; uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t ucode_fw_version[AMDGPU_MAX_MES_PIPES]; uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES]; /* mes ucode data */ struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES]; uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES]; uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES]; - uint32_t data_fw_version[AMDGPU_MAX_MES_PIPES]; uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES]; /* eop gpu obj */ @@ -222,6 +220,8 @@ struct mes_add_queue_input { uint64_t tba_addr; uint64_t tma_addr; uint32_t is_kfd_process; + uint32_t is_aql_queue; + uint32_t queue_size; }; struct mes_remove_queue_input { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 37322550d750..8a39300b1a84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -36,7 +36,6 @@ #include <drm/drm_encoder.h> #include <drm/drm_fixed.h> #include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_probe_helper.h> #include <linux/i2c.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e6a9b9fc9e0b..974e85d8b6cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -542,6 +542,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, /* GWS and OA don't need any alignment. */ page_align = bp->byte_align; size <<= PAGE_SHIFT; + } else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) { /* Both size and alignment must be a multiple of 4. */ page_align = ALIGN(bp->byte_align, 4); @@ -688,13 +689,16 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, * num of amdgpu_vm_pt entries. */ BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm)); - bp->destroy = &amdgpu_bo_vm_destroy; r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr); INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list); + /* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list + * is initialized. + */ + bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy; return r; } @@ -773,7 +777,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); + r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index cfcaf890a6a1..2fcb5bfbef89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -511,6 +511,11 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + if (psp->km_ring.ring_mem) + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &psp->km_ring.ring_mem_mc_addr, + (void **)&psp->km_ring.ring_mem); + amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -766,7 +771,7 @@ static int psp_tmr_init(struct psp_context *psp) } pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev), + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, pptr); @@ -983,6 +988,8 @@ int psp_ta_unload(struct psp_context *psp, struct ta_context *context) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + release_psp_cmd_buf(psp); return ret; @@ -1064,42 +1071,6 @@ int psp_ta_init_shared_buf(struct psp_context *psp, &mem_ctx->shared_buf); } -static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, - uint32_t ta_cmd_id, - struct ta_context *context) -{ - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = context->session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; - - cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1; - cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = - lower_32_bits(context->mem_context.shared_mc_addr); - cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = - upper_32_bits(context->mem_context.shared_mc_addr); -} - -int psp_ta_invoke_indirect(struct psp_context *psp, - uint32_t ta_cmd_id, - struct ta_context *context) -{ - int ret; - struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - - psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); - - ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr); - - context->resp_status = cmd->resp.status; - - release_psp_cmd_buf(psp); - - return ret; -} - static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -1542,7 +1513,7 @@ int psp_ras_terminate(struct psp_context *psp) return ret; } -static int psp_ras_initialize(struct psp_context *psp) +int psp_ras_initialize(struct psp_context *psp) { int ret; uint32_t boot_cfg = 0xFF; @@ -1555,6 +1526,11 @@ static int psp_ras_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) return 0; + if (psp->ras_context.context.initialized) { + dev_warn(adev->dev, "RAS WARN: TA has already been loaded\n"); + return 0; + } + if (!adev->psp.ras_context.context.bin_desc.size_bytes || !adev->psp.ras_context.context.bin_desc.start_addr) { dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n"); @@ -1605,7 +1581,7 @@ static int psp_ras_initialize(struct psp_context *psp) psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE; psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; - if (!psp->ras_context.context.initialized) { + if (!psp->ras_context.context.mem_context.shared_buf) { ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context); if (ret) return ret; @@ -1626,7 +1602,6 @@ static int psp_ras_initialize(struct psp_context *psp) else { if (ras_cmd->ras_status) dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); - amdgpu_ras_fini(psp->adev); } return ret; @@ -1933,10 +1908,15 @@ static int psp_securedisplay_initialize(struct psp_context *psp) } else return ret; + mutex_lock(&psp->securedisplay_context.mutex); + psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__QUERY_TA); ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); + + mutex_unlock(&psp->securedisplay_context.mutex); + if (ret) { psp_securedisplay_terminate(psp); /* free securedisplay shared memory */ @@ -1985,12 +1965,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC) return -EINVAL; - mutex_lock(&psp->securedisplay_context.mutex); - ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); - mutex_unlock(&psp->securedisplay_context.mutex); - return ret; } /* SECUREDISPLAY end */ @@ -2055,6 +2031,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->ras_drv)) && + (psp->funcs->bootloader_load_ras_drv != NULL)) { + ret = psp_bootloader_load_ras_drv(psp); + if (ret) { + DRM_ERROR("PSP load ras_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3040,6 +3025,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->dbg_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_RAS_DRV: + psp->ras_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c32b74bd970f..cbd4194a2883 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -36,6 +36,7 @@ #define PSP_CMD_BUFFER_SIZE 0x1000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE(adev) ((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000) +#define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 enum psp_shared_mem_size { @@ -71,6 +72,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, PSP_BL__LOAD_INTFDRV = 0xD0000, + PSP_BL__LOAD_RASDRV = 0xE0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -114,6 +116,7 @@ struct psp_funcs int (*bootloader_load_soc_drv)(struct psp_context *psp); int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); + int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, @@ -133,6 +136,12 @@ struct psp_funcs int (*vbflash_stat)(struct psp_context *psp); }; +struct ta_funcs { + int (*fn_ta_initialize)(struct psp_context *psp); + int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id); + int (*fn_ta_terminate)(struct psp_context *psp); +}; + #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 struct psp_xgmi_node_info { uint64_t node_id; @@ -306,6 +315,7 @@ struct psp_context struct psp_gfx_cmd_resp *cmd; const struct psp_funcs *funcs; + const struct ta_funcs *ta_funcs; /* firmware buffer */ struct amdgpu_bo *fw_pri_bo; @@ -323,6 +333,7 @@ struct psp_context struct psp_bin_desc soc_drv; struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; + struct psp_bin_desc ras_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -403,6 +414,9 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0) #define psp_bootloader_load_dbg_drv(psp) \ ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0) +#define psp_bootloader_load_ras_drv(psp) \ + ((psp)->funcs->bootloader_load_ras_drv ? \ + (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ @@ -456,9 +470,6 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context); int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, struct ta_context *context); -int psp_ta_invoke_indirect(struct psp_context *psp, - uint32_t ta_cmd_id, - struct ta_context *context); int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); @@ -472,7 +483,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, int psp_xgmi_set_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); - +int psp_ras_initialize(struct psp_context *psp); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0988e00612e5..468a67b302d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -41,30 +41,46 @@ static uint32_t get_bin_version(const uint8_t *bin) return hdr->ucode_version; } -static void prep_ta_mem_context(struct psp_context *psp, - struct ta_context *context, +static int prep_ta_mem_context(struct ta_mem_context *mem_context, uint8_t *shared_buf, uint32_t shared_buf_len) { - context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); - psp_ta_init_shared_buf(psp, &context->mem_context); + if (mem_context->shared_mem_size < shared_buf_len) + return -EINVAL; + memset(mem_context->shared_buf, 0, mem_context->shared_mem_size); + memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len); - memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); + return 0; } static bool is_ta_type_valid(enum ta_type_id ta_type) { - bool ret = false; + switch (ta_type) { + case TA_TYPE_RAS: + return true; + default: + return false; + } +} + +static const struct ta_funcs ras_ta_funcs = { + .fn_ta_initialize = psp_ras_initialize, + .fn_ta_invoke = psp_ras_invoke, + .fn_ta_terminate = psp_ras_terminate +}; +static void set_ta_context_funcs(struct psp_context *psp, + enum ta_type_id ta_type, + struct ta_context **pcontext) +{ switch (ta_type) { case TA_TYPE_RAS: - ret = true; + *pcontext = &psp->ras_context.context; + psp->ta_funcs = &ras_ta_funcs; break; default: break; } - - return ret; } static const struct file_operations ta_load_debugfs_fops = { @@ -85,8 +101,7 @@ static const struct file_operations ta_invoke_debugfs_fops = { .owner = THIS_MODULE }; - -/** +/* * DOC: AMDGPU TA debugfs interfaces * * Three debugfs interfaces can be opened by a program to @@ -111,15 +126,18 @@ static const struct file_operations ta_invoke_debugfs_fops = { * * - For TA invoke debugfs interface: * Transmit buffer: + * - TA type (4bytes) * - TA ID (4bytes) * - TA CMD ID (4bytes) - * - TA shard buf length (4bytes) + * - TA shard buf length + * (4bytes, value not beyond TA shared memory size) * - TA shared buf * Receive buffer: * - TA shared buf * * - For TA unload debugfs interface: * Transmit buffer: + * - TA type (4bytes) * - TA ID (4bytes) */ @@ -131,59 +149,92 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t uint32_t copy_pos = 0; int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); if (ret || (!is_ta_type_valid(ta_type))) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); if (!ta_bin) - ret = -ENOMEM; + return -ENOMEM; if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) { ret = -EFAULT; goto err_free_bin; } - ret = psp_ras_terminate(psp); - if (ret) { - dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); + /* Set TA context and functions */ + set_ta_context_funcs(psp, ta_type, &context); + + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { + dev_err(adev->dev, "Unsupported function to terminate TA\n"); + ret = -EOPNOTSUPP; goto err_free_bin; } - context.ta_type = ta_type; - context.ta_load_type = GFX_CMD_ID_LOAD_TA; - context.bin_desc.fw_version = get_bin_version(ta_bin); - context.bin_desc.size_bytes = ta_bin_len; - context.bin_desc.start_addr = ta_bin; + /* + * Allocate TA shared buf in case shared buf was freed + * due to loading TA failed before. + */ + if (!context->mem_context.shared_buf) { + ret = psp_ta_init_shared_buf(psp, &context->mem_context); + if (ret) { + ret = -ENOMEM; + goto err_free_bin; + } + } + + ret = psp_fn_ta_terminate(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, + "Failed to unload embedded TA (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) + ret = -EINVAL; + goto err_free_ta_shared_buf; + } + + /* Prepare TA context for TA initialization */ + context->ta_type = ta_type; + context->bin_desc.fw_version = get_bin_version(ta_bin); + context->bin_desc.size_bytes = ta_bin_len; + context->bin_desc.start_addr = ta_bin; - ret = psp_ta_load(psp, &context); + if (!psp->ta_funcs->fn_ta_initialize) { + dev_err(adev->dev, "Unsupported function to initialize TA\n"); + ret = -EOPNOTSUPP; + goto err_free_ta_shared_buf; + } - if (ret || context.resp_status) { - dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", - ret, context.resp_status); + ret = psp_fn_ta_initialize(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); if (!ret) ret = -EINVAL; - goto err_free_bin; + goto err_free_ta_shared_buf; } - context.initialized = true; - if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t))) + if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t))) ret = -EFAULT; +err_free_ta_shared_buf: + /* Only free TA shared buf when returns error code */ + if (ret && context->mem_context.shared_buf) + psp_ta_free_shared_buf(&context->mem_context); err_free_bin: kfree(ta_bin); @@ -192,58 +243,85 @@ err_free_bin: static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) { - uint32_t ta_id = 0; - int ret = 0; + uint32_t ta_type = 0; + uint32_t ta_id = 0; + uint32_t copy_pos = 0; + int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; - ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret || (!is_ta_type_valid(ta_type))) + return -EFAULT; + + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; - context.session_id = ta_id; + set_ta_context_funcs(psp, ta_type, &context); + context->session_id = ta_id; - ret = psp_ta_unload(psp, &context); - if (!ret) - context.initialized = false; + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { + dev_err(adev->dev, "Unsupported function to terminate TA\n"); + return -EOPNOTSUPP; + } + + ret = psp_fn_ta_terminate(psp); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) + ret = -EINVAL; + } + + if (context->mem_context.shared_buf) + psp_ta_free_shared_buf(&context->mem_context); return ret; } static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) { + uint32_t ta_type = 0; uint32_t ta_id = 0; uint32_t cmd_id = 0; uint32_t shared_buf_len = 0; - uint8_t *shared_buf = NULL; + uint8_t *shared_buf = NULL; uint32_t copy_pos = 0; int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; - struct psp_context *psp = &adev->psp; - struct ta_context context = {0}; + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context *context = NULL; if (!buf) return -EINVAL; + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EFAULT; + copy_pos += sizeof(uint32_t); + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t)); if (ret) - return -EINVAL; + return -EFAULT; copy_pos += sizeof(uint32_t); shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); @@ -254,26 +332,39 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size goto err_free_shared_buf; } - context.session_id = ta_id; + set_ta_context_funcs(psp, ta_type, &context); + + if (!context->initialized) { + dev_err(adev->dev, "TA is not initialized\n"); + ret = -EINVAL; + goto err_free_shared_buf; + } + + if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { + dev_err(adev->dev, "Unsupported function to invoke TA\n"); + ret = -EOPNOTSUPP; + goto err_free_shared_buf; + } - prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); + context->session_id = ta_id; - ret = psp_ta_invoke_indirect(psp, cmd_id, &context); + ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); + if (ret) + goto err_free_shared_buf; - if (ret || context.resp_status) { - dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", - ret, context.resp_status); - if (!ret) + ret = psp_fn_ta_invoke(psp, cmd_id); + if (ret || context->resp_status) { + dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n", + ret, context->resp_status); + if (!ret) { ret = -EINVAL; - goto err_free_ta_shared_buf; + goto err_free_shared_buf; + } } - if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; -err_free_ta_shared_buf: - psp_ta_free_shared_buf(&context.mem_context); - err_free_shared_buf: kfree(shared_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h index cfc1542f63ef..14cd1c81c3e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -24,6 +24,11 @@ #ifndef __AMDGPU_PSP_TA_H__ #define __AMDGPU_PSP_TA_H__ +/* Calling set_ta_context_funcs is required before using the following macros */ +#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp))) +#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id))) +#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp))) + void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ab9ba5a9c33d..693bce07eb46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * { bool poison_stat = false; struct amdgpu_device *adev = obj->adev; - struct ras_err_data err_data = {0, 0, 0, NULL}; struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, &err_data, false); + amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1811,7 +1810,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) amdgpu_ras_query_error_status(adev, &info); if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) { if (amdgpu_ras_reset_error_status(adev, info.head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status"); } @@ -1949,7 +1949,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } @@ -2267,6 +2266,25 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) { + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 2): + return true; + default: + return false; + } + } + + if (adev->asic_type == CHIP_IP_DISCOVERY) { + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + return true; + default: + return false; + } + } + return adev->asic_type == CHIP_VEGA10 || adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS || @@ -2310,11 +2328,6 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) !amdgpu_ras_asic_supported(adev)) return; - /* If driver run on sriov guest side, only enable ras for aldebaran */ - if (amdgpu_sriov_vf(adev) && - adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2)) - return; - if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); @@ -2719,7 +2732,8 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) /* Need disable ras on all IPs here before ip [hw/sw]fini */ - amdgpu_ras_disable_all_features(adev, 0); + if (con->features) + amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); return 0; } @@ -2832,11 +2846,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, struct mce *m = (struct mce *)data; struct amdgpu_device *adev = NULL; uint32_t gpu_id = 0; - uint32_t umc_inst = 0; - uint32_t ch_inst, channel_index = 0; - struct ras_err_data err_data = {0, 0, 0, NULL}; - struct eeprom_table_record err_rec; - uint64_t retired_page; + uint32_t umc_inst = 0, ch_inst = 0; /* * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2875,29 +2885,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); - /* - * Translate UMC channel address to Physical address - */ - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num - + ch_inst]; - - retired_page = ADDR_OF_8KB_BLOCK(m->addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(m->addr); - - memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - err_data.err_addr = &err_rec; - amdgpu_umc_fill_error_record(&err_data, m->addr, - retired_page, channel_index, umc_inst); - - if (amdgpu_bad_page_threshold != 0) { - amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); - amdgpu_ras_save_bad_pages(adev); - } - - return NOTIFY_OK; + if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst)) + return NOTIFY_OK; + else + return NOTIFY_DONE; } static struct notifier_block amdgpu_bad_page_nb = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index c4283987bb1e..7268ae65c140 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -38,6 +38,7 @@ #define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 #define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 #define EEPROM_I2C_MADDR_ALDEBARAN 0x0 +#define EEPROM_I2C_MADDR_54H (0x54UL << 16) /* * The 2 macros bellow represent the actual size in bytes that @@ -89,6 +90,16 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { + if (adev->asic_type == CHIP_IP_DISCOVERY) { + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + return true; + default: + return false; + } + } + return adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS || adev->asic_type == CHIP_SIENNA_CICHLID || @@ -113,6 +124,19 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, return true; } +static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev, + struct amdgpu_ras_eeprom_control *control) +{ + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 10): + control->i2c_address = EEPROM_I2C_MADDR_54H; + return true; + default: + return false; + } +} + static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, struct amdgpu_ras_eeprom_control *control) { @@ -152,10 +176,22 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN; break; + case CHIP_IP_DISCOVERY: + return __get_eeprom_i2c_addr_ip_discovery(adev, control); + default: return false; } + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + control->i2c_address = EEPROM_I2C_MADDR_54H; + break; + + default: + break; + } + return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..5c4f93ee0c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -62,7 +62,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, if (!res) goto fallback; - BUG_ON(start + size > res->num_pages << PAGE_SHIFT); + BUG_ON(start + size > res->size); cur->mem_type = res->mem_type; @@ -110,7 +110,7 @@ fallback: cur->size = size; cur->remaining = size; cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + WARN_ON(res && start + size > res->size); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 9da5ead50c90..f778466bb9db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -37,8 +37,6 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; - adev->amdgpu_reset_level_mask = 0x1; - switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); @@ -76,12 +74,6 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, { struct amdgpu_reset_handler *reset_handler = NULL; - if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) - return -ENOSYS; - - if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) - return -ENOSYS; - if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); @@ -98,12 +90,6 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int ret; struct amdgpu_reset_handler *reset_handler = NULL; - if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) - return -ENOSYS; - - if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) - return -ENOSYS; - if (adev->reset_cntl) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index f71b83c42590..f4a501ff87d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -30,7 +30,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, - AMDGPU_SKIP_MODE2_RESET = 2, + AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, }; struct amdgpu_reset_context { @@ -112,7 +112,8 @@ static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *dom static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain) { - kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain); + if (domain) + kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain); } static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 3e316b013fd9..d3558c34d406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -405,9 +405,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY)) - return false; - if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 6373bfb47d55..012b72d00e04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -272,3 +272,275 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); } + +static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev) +{ + const struct common_firmware_header *common_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + struct amdgpu_firmware_info *info; + unsigned int *tmp; + unsigned int i; + + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n"); + return -ENOMEM; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + if (info->fw) { + common_hdr = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE); + } + } + + return 0; +} + +static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_2 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); + adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); + adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); + adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_3 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; + adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version); + adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version); + adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); + adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); + + adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version); + adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version); + adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); + adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.rlcp_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_P; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.rlcv_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_V; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); + } + } +} + +static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + struct amdgpu_firmware_info *info; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + } +} + +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor) +{ + int err; + + if (version_major < 2) { + /* only support rlc_hdr v2.x and onwards */ + dev_err(adev->dev, "unsupported rlc fw hdr\n"); + return -EINVAL; + } + + /* is_rlc_v2_1 is still used in APU code path */ + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + if (version_minor >= 0) { + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; + } + } + if (version_minor >= 1) + amdgpu_gfx_rlc_init_microcode_v2_1(adev); + if (version_minor >= 2) + amdgpu_gfx_rlc_init_microcode_v2_2(adev); + if (version_minor == 3) + amdgpu_gfx_rlc_init_microcode_v2_3(adev); + if (version_minor == 4) + amdgpu_gfx_rlc_init_microcode_v2_4(adev); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 03ac36b2c2cf..23f060db9255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -267,5 +267,7 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); - +int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, + uint16_t version_major, + uint16_t version_minor); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 42c1f050542f..ea5278f094c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -21,6 +21,7 @@ * */ +#include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" @@ -150,3 +151,158 @@ int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + uint16_t version_major; + const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; + const struct sdma_firmware_header_v2_0 *hdr_v2; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + header = (const struct common_firmware_header *) + sdma_inst->fw->data; + version_major = le16_to_cpu(header->header_version_major); + + switch (version_major) { + case 1: + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + break; + case 2: + hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version); + break; + default: + return -EINVAL; + } + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, + bool duplicate) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + if (duplicate) + break; + } + + memset((void *)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + +int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, + char *fw_name, u32 instance, + bool duplicate) +{ + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + int err = 0, i; + const struct sdma_firmware_header_v2_0 *sdma_hdr; + uint16_t version_major; + + err = request_firmware(&adev->sdma.instance[instance].fw, fw_name, adev->dev); + if (err) + goto out; + + header = (const struct common_firmware_header *) + adev->sdma.instance[instance].fw->data; + version_major = le16_to_cpu(header->header_version_major); + + if ((duplicate && instance) || (!duplicate && version_major > 1)) { + err = -EINVAL; + goto out; + } + + err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]); + if (err) + goto out; + + if (duplicate) { + for (i = 1; i < adev->sdma.num_instances; i++) + memcpy((void *)&adev->sdma.instance[i], + (void *)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + + if (amdgpu_sriov_vf(adev)) + return 0; + + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + switch (version_major) { + case 1: + for (i = 0; i < adev->sdma.num_instances; i++) { + if (!duplicate && (instance != i)) + continue; + else { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } + break; + case 2: + sdma_hdr = (const struct sdma_firmware_header_v2_0 *) + adev->sdma.instance[0].fw->data; + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0; + info->fw = adev->sdma.instance[0].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1; + info->fw = adev->sdma.instance[0].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE); + break; + default: + err = -EINVAL; + } + } + +out: + if (err) { + DRM_ERROR("SDMA: Failed to init firmware \"%s\"\n", fw_name); + amdgpu_sdma_destroy_inst_ctx(adev, duplicate); + } + return err; +} + +void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) { + sdma = &adev->sdma.instance[i].page; + if (adev->mman.buffer_funcs_ring == sdma) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + break; + } + } + sdma = &adev->sdma.instance[i].ring; + if (adev->mman.buffer_funcs_ring == sdma) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + break; + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 53ac3ebae8d6..7d99205c2e01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -124,4 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, + char *fw_name, u32 instance, bool duplicate); +void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, + bool duplicate); +void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index cc7597a15fe9..2c1d82fc4c34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -121,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u switch (op) { case 1: + mutex_lock(&psp->securedisplay_context.mutex); psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__QUERY_TA); ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); @@ -131,8 +132,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u else psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); } + mutex_unlock(&psp->securedisplay_context.mutex); break; case 2: + mutex_lock(&psp->securedisplay_context.mutex); psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC); securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id; @@ -146,6 +149,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status); } } + mutex_unlock(&psp->securedisplay_context.mutex); break; default: dev_err(adev->dev, "Invalid input: %s\n", str); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 504af1b93bfa..bac7976975bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: MIT /* * Copyright 2014 Advanced Micro Devices, Inc. * All Rights Reserved. @@ -258,6 +259,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return 0; } +/* Free the entry back to the slab */ +static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) +{ + hash_del(&e->node); + dma_fence_put(e->fence); + kmem_cache_free(amdgpu_sync_slab, e); +} + /** * amdgpu_sync_peek_fence - get the next fence not signaled yet * @@ -279,9 +288,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (dma_fence_is_signaled(f)) { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); continue; } if (ring && s_fence) { @@ -315,6 +322,7 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) struct hlist_node *tmp; struct dma_fence *f; int i; + hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; @@ -353,15 +361,42 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) if (r) return r; } else { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); } } return 0; } +/** + * amdgpu_sync_push_to_job - push fences into job + * @sync: sync object to get the fences from + * @job: job to push the fences into + * + * Add all unsignaled fences from sync to job. + */ +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + f = e->fence; + if (dma_fence_is_signaled(f)) { + amdgpu_sync_entry_free(e); + continue; + } + + dma_fence_get(f); + r = drm_sched_job_add_dependency(&job->base, f); + if (r) + return r; + } + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; @@ -373,9 +408,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) if (r) return r; - hash_del(&e->node); - dma_fence_put(e->fence); - kmem_cache_free(amdgpu_sync_slab, e); + amdgpu_sync_entry_free(e); } return 0; @@ -392,13 +425,10 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; - unsigned i; + unsigned int i; - hash_for_each_safe(sync->fences, i, tmp, e, node) { - hash_del(&e->node); - dma_fence_put(e->fence); - kmem_cache_free(amdgpu_sync_slab, e); - } + hash_for_each_safe(sync->fences, i, tmp, e, node) + amdgpu_sync_entry_free(e); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 2d5c613cda10..cf1e9e858efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -30,6 +30,7 @@ struct dma_fence; struct dma_resv; struct amdgpu_device; struct amdgpu_ring; +struct amdgpu_job; enum amdgpu_sync_mode { AMDGPU_SYNC_ALWAYS, @@ -54,6 +55,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 06dfcf297a8d..677ad2016976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -127,7 +127,7 @@ TRACE_EVENT(amdgpu_bo_create, TP_fast_assign( __entry->bo = bo; - __entry->pages = bo->tbo.resource->num_pages; + __entry->pages = PFN_UP(bo->tbo.resource->size); __entry->type = bo->tbo.resource->mem_type; __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; @@ -140,8 +140,10 @@ TRACE_EVENT(amdgpu_bo_create, ); TRACE_EVENT(amdgpu_cs, - TP_PROTO(struct amdgpu_cs_parser *p, int i), - TP_ARGS(p, i), + TP_PROTO(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib), + TP_ARGS(p, job, ib), TP_STRUCT__entry( __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) @@ -151,10 +153,10 @@ TRACE_EVENT(amdgpu_cs, TP_fast_assign( __entry->bo_list = p->bo_list; - __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx; - __entry->dw = p->job->ibs[i].length_dw; + __entry->ring = to_amdgpu_ring(job->base.sched)->idx; + __entry->dw = ib->length_dw; __entry->fences = amdgpu_fence_count_emitted( - to_amdgpu_ring(p->entity->rq->sched)); + to_amdgpu_ring(job->base.sched)); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b1c455329023..0ba87ca6f318 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -189,7 +189,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct amdgpu_device *adev = ring->adev; unsigned offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; - struct dma_fence *fence; struct amdgpu_job *job; void *cpu_addr; uint64_t flags; @@ -229,7 +228,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -269,18 +270,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, } } - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - - dma_fence_put(fence); - - return r; - -error_free: - amdgpu_job_free(job); - return r; + dma_fence_put(amdgpu_job_submit(job)); + return 0; } /** @@ -381,7 +372,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, dst.offset = 0; r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, - new_mem->num_pages << PAGE_SHIFT, + new_mem->size, amdgpu_bo_encrypted(abo), bo->base.resv, &fence); if (r) @@ -424,8 +415,9 @@ error: static bool amdgpu_mem_visible(struct amdgpu_device *adev, struct ttm_resource *mem) { - uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT; + u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; + u64 end; if (mem->mem_type == TTM_PL_SYSTEM || mem->mem_type == TTM_PL_TT) @@ -434,12 +426,21 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev, return false; amdgpu_res_first(mem, 0, mem_size, &cursor); + end = cursor.start + cursor.size; + while (cursor.remaining) { + amdgpu_res_next(&cursor, cursor.size); - /* ttm_resource_ioremap only supports contiguous memory */ - if (cursor.size != mem_size) - return false; + if (!cursor.remaining) + break; + + /* ttm_resource_ioremap only supports contiguous memory */ + if (end != cursor.start) + return false; + + end = cursor.start + cursor.size; + } - return cursor.start + cursor.size <= adev->gmc.visible_vram_size; + return end <= adev->gmc.visible_vram_size; } /* @@ -561,7 +562,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; + size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -1407,7 +1408,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, } static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, - unsigned long offset, void *buf, int len, int write) + unsigned long offset, void *buf, + int len, int write) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -1431,26 +1433,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, memcpy(adev->mman.sdma_access_ptr, buf, len); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4, AMDGPU_IB_POOL_DELAYED, + &job); if (r) goto out; amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); - src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; + src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + + src_mm.start; dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo); if (write) swap(src_addr, dst_addr); - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, + PAGE_SIZE, false); amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) { - amdgpu_job_free(job); - goto out; - } + fence = amdgpu_job_submit(job); if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) r = -ETIMEDOUT; @@ -1949,7 +1952,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, AMDGPU_IB_POOL_DELAYED; int r; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + num_dw * 4, pool, job); if (r) return r; @@ -1959,17 +1964,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, adev->gart.bo); (*job)->vm_needs_flush = true; } - if (resv) { - r = amdgpu_sync_resv(adev, &(*job)->sync, resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - amdgpu_job_free(*job); - return r; - } - } - return 0; + if (!resv) + return 0; + + return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, + DMA_RESV_USAGE_BOOKKEEP); } int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2014,8 +2013,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, if (direct_submit) r = amdgpu_job_submit_direct(job, ring, fence); else - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, fence); + *fence = amdgpu_job_submit(job); if (r) goto error_free; @@ -2060,16 +2058,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, fence); - if (r) - goto error_free; - + *fence = amdgpu_job_submit(job); return 0; - -error_free: - amdgpu_job_free(job); - return r; } int amdgpu_fill_buffer(struct amdgpu_bo *bo, @@ -2285,9 +2275,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_local_page(p); r = copy_to_user(buf, ptr + off, bytes); - kunmap(p); + kunmap_local(ptr); if (r) return -EFAULT; @@ -2336,9 +2326,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, if (p->mapping != adev->mman.bdev.dev_mapping) return -EPERM; - ptr = kmap(p); + ptr = kmap_local_page(p); r = copy_from_user(ptr + off, buf, bytes); - kunmap(p); + kunmap_local(ptr); if (r) return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 939c8614f0e3..5cb62e6249c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -164,70 +164,138 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) } else if (version_major == 2) { const struct rlc_firmware_header_v2_0 *rlc_hdr = container_of(hdr, struct rlc_firmware_header_v2_0, header); + const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 = + container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 = + container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1); + const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 = + container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2); + const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 = + container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3); - DRM_DEBUG("ucode_feature_version: %u\n", - le32_to_cpu(rlc_hdr->ucode_feature_version)); - DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset)); - DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size)); - DRM_DEBUG("save_and_restore_offset: %u\n", - le32_to_cpu(rlc_hdr->save_and_restore_offset)); - DRM_DEBUG("clear_state_descriptor_offset: %u\n", - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset)); - DRM_DEBUG("avail_scratch_ram_locations: %u\n", - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations)); - DRM_DEBUG("reg_restore_list_size: %u\n", - le32_to_cpu(rlc_hdr->reg_restore_list_size)); - DRM_DEBUG("reg_list_format_start: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_start)); - DRM_DEBUG("reg_list_format_separate_start: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_start)); - DRM_DEBUG("starting_offsets_start: %u\n", - le32_to_cpu(rlc_hdr->starting_offsets_start)); - DRM_DEBUG("reg_list_format_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes)); - DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - DRM_DEBUG("reg_list_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_size_bytes)); - DRM_DEBUG("reg_list_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes)); - DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); - DRM_DEBUG("reg_list_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); - DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); - if (version_minor == 1) { - const struct rlc_firmware_header_v2_1 *v2_1 = - container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + switch (version_minor) { + case 0: + /* rlc_hdr v2_0 */ + DRM_DEBUG("ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr->ucode_feature_version)); + DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset)); + DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size)); + DRM_DEBUG("save_and_restore_offset: %u\n", + le32_to_cpu(rlc_hdr->save_and_restore_offset)); + DRM_DEBUG("clear_state_descriptor_offset: %u\n", + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset)); + DRM_DEBUG("avail_scratch_ram_locations: %u\n", + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations)); + DRM_DEBUG("reg_restore_list_size: %u\n", + le32_to_cpu(rlc_hdr->reg_restore_list_size)); + DRM_DEBUG("reg_list_format_start: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_start)); + DRM_DEBUG("reg_list_format_separate_start: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_start)); + DRM_DEBUG("starting_offsets_start: %u\n", + le32_to_cpu(rlc_hdr->starting_offsets_start)); + DRM_DEBUG("reg_list_format_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes)); + DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + DRM_DEBUG("reg_list_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_size_bytes)); + DRM_DEBUG("reg_list_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes)); + DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); + DRM_DEBUG("reg_list_separate_size_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); + DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); + break; + case 1: + /* rlc_hdr v2_1 */ DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", - le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); + le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length)); DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver)); DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver)); DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes)); DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes)); DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver)); DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver)); DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes)); DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes)); DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver)); DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver)); DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes)); DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", - le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); + le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes)); + break; + case 2: + /* rlc_hdr v2_2 */ + DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes)); + DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes)); + DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes)); + DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes)); + break; + case 3: + /* rlc_hdr v2_3 */ + DRM_DEBUG("rlcp_ucode_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version)); + DRM_DEBUG("rlcp_ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version)); + DRM_DEBUG("rlcp_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes)); + DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes)); + DRM_DEBUG("rlcv_ucode_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version)); + DRM_DEBUG("rlcv_ucode_feature_version: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version)); + DRM_DEBUG("rlcv_ucode_size_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes)); + DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes)); + break; + case 4: + /* rlc_hdr v2_4 */ + DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes)); + DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes)); + DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n", + le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes)); + DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes)); + break; + default: + DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor); + break; } } else { DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); @@ -630,6 +698,7 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version); FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version); FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version); FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version); @@ -651,7 +720,8 @@ static struct attribute *fw_attrs[] = { &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, - &dev_attr_dmcu_fw_version.attr, NULL + &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr, + NULL }; static const struct attribute_group fw_attr_group = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 96b6cf4c4d54..552e06929229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -124,6 +124,8 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_SOC_DRV, PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, + PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_MAX_INDEX, }; /* version_major=2, version_minor=0 */ @@ -260,8 +262,12 @@ struct rlc_firmware_header_v2_2 { /* version_major=2, version_minor=3 */ struct rlc_firmware_header_v2_3 { struct rlc_firmware_header_v2_2 v2_2; + uint32_t rlcp_ucode_version; + uint32_t rlcp_ucode_feature_version; uint32_t rlcp_ucode_size_bytes; uint32_t rlcp_ucode_offset_bytes; + uint32_t rlcv_ucode_version; + uint32_t rlcv_ucode_feature_version; uint32_t rlcv_ucode_size_bytes; uint32_t rlcv_ucode_offset_bytes; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index aad3c8b4c810..f76c19fc0392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -22,6 +22,59 @@ */ #include "amdgpu.h" +#include "umc_v6_7.h" + +static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + switch (adev->ip_versions[UMC_HWIP][0]) { + case IP_VERSION(6, 7, 0): + umc_v6_7_convert_error_address(adev, + err_data, err_addr, ch_inst, umc_inst); + break; + default: + dev_warn(adev->dev, + "UMC address to Physical address translation is not supported\n"); + return AMDGPU_RAS_FAIL; + } + + return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) +{ + struct ras_err_data err_data = {0, 0, 0, NULL}; + int ret = AMDGPU_RAS_FAIL; + + err_data.err_addr = + kcalloc(adev->umc.max_ras_err_cnt_per_query, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, + "Failed to alloc memory for umc error record in MCA notifier!\n"); + return AMDGPU_RAS_FAIL; + } + + /* + * Translate UMC channel address to Physical address + */ + ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr, + ch_inst, umc_inst); + if (ret) + goto out; + + if (amdgpu_bad_page_threshold != 0) { + amdgpu_ras_add_bad_pages(adev, err_data.err_addr, + err_data.err_addr_cnt); + amdgpu_ras_save_bad_pages(adev); + } + +out: + kfree(err_data.err_addr); + return ret; +} static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, @@ -112,23 +165,29 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset) +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) { - int ret; - struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + int ret = AMDGPU_RAS_SUCCESS; - ret = - amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); + if (!adev->gmc.xgmi.connected_to_cpu) { + struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data->ue_count; - obj->err_data.ce_count += err_data->ce_count; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + } + } else if (reset) { + /* MCA poison handler is only responsible for GPU reset, + * let MCA notifier do page retirement. + */ + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_reset_gpu(adev); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 3629d8f292ef..a6951160f13a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -83,9 +83,7 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, - void *ras_error_status, - bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -98,4 +96,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry); +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6eac649499d3..e00bb654e24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1132,7 +1132,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, unsigned offset_idx = 0; unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; - r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + 64, direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -1175,16 +1177,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; } else { - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); + r = drm_sched_job_add_resv_dependencies(&job->base, + bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL); if (r) goto err_free; - r = amdgpu_job_submit(job, &adev->uvd.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err_free; + f = amdgpu_job_submit(job); } amdgpu_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 02cb3a12dd76..b239e874f2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -450,8 +450,10 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; @@ -538,7 +540,9 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence *f = NULL; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, + r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + ib_size_dw * 4, direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED, &job); if (r) @@ -570,8 +574,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, if (direct) r = amdgpu_job_submit_direct(job, ring, &f); else - r = amdgpu_job_submit(job, &ring->adev->vce.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); + f = amdgpu_job_submit(job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f36e4f08db6d..3449145ab2bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -191,7 +191,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) fw_name = FIRMWARE_VCN4_0_2; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) - adev->vcn.indirect_sram = false; + adev->vcn.indirect_sram = true; break; case IP_VERSION(4, 0, 4): fw_name = FIRMWARE_VCN4_0_4; @@ -600,15 +600,16 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence) { + u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); int i, r; - r = amdgpu_job_alloc_with_ib(adev, 64, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); if (r) goto err; @@ -787,8 +788,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) goto err; @@ -916,8 +918,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; @@ -982,8 +985,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han if (sq) ib_size_dw += 8; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, + ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, + &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 80b7a6cfd026..253ea6b159df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -161,7 +161,8 @@ #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) -#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 12) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) +#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -171,6 +172,9 @@ #define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2) #define VCN_CODEC_DISABLE_MASK_H264 (1 << 3) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0) +#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1) + enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, FW_QUEUE_DPG_HOLD_OFF = 2, @@ -335,7 +339,9 @@ struct amdgpu_vcn4_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e4af40b9a8aa..a226a6c48fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -64,6 +64,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; + + /* enable mcbp for sriov asic_type before soc21 */ + amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; + } void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, @@ -547,6 +551,7 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU, adev->gfx.imu_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_context.bin_desc.fw_version); @@ -726,6 +731,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index d94c31e68a14..2b9d806e23af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -31,6 +31,7 @@ #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ +#define AMDGPU_VF_MMIO_ACCESS_PROTECT (1 << 5) /* MMIO write access is not allowed in sriov runtime */ /* flags for indirect register access path supported by rlcg for sriov */ #define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28) @@ -74,6 +75,8 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; +enum idh_request; + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -83,7 +86,8 @@ struct amdgpu_virt_ops { int (*req_init_data)(struct amdgpu_device *adev); int (*reset_gpu)(struct amdgpu_device *adev); int (*wait_reset)(struct amdgpu_device *adev); - void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); + void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, + u32 data1, u32 data2, u32 data3); }; /* @@ -297,6 +301,9 @@ struct amdgpu_video_codec_info; #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) +#define amdgpu_sriov_vf_mmio_access_protection(adev) \ +((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT) + static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index f4b5301ea2a0..9b81b6867db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -498,7 +498,7 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; r = amdgpu_display_modeset_create_props(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 59cac347baa3..2291aa14d888 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -183,10 +183,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); else list_move_tail(&vm_bo->vm_status, &vm->evicted); + spin_unlock(&vm_bo->vm->status_lock); } /** * amdgpu_vm_bo_moved - vm_bo is moved @@ -198,7 +200,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -211,7 +215,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + spin_unlock(&vm_bo->vm->status_lock); vm_bo->moved = false; } @@ -225,9 +231,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -240,10 +246,13 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { - if (vm_bo->bo->parent) + if (vm_bo->bo->parent) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); - else + spin_unlock(&vm_bo->vm->status_lock); + } else { amdgpu_vm_bo_idle(vm_bo); + } } /** @@ -256,9 +265,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -363,12 +372,20 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - struct amdgpu_vm_bo_base *bo_base, *tmp; + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *shadow; + struct amdgpu_bo *bo; int r; - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) @@ -385,7 +402,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } + spin_lock(&vm->status_lock); } + spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); vm->evicting = false; @@ -406,13 +425,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { + bool empty; bool ret; amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); - return ret && list_empty(&vm->evicted); + spin_lock(&vm->status_lock); + empty = list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); + + return ret && empty; } /** @@ -680,9 +704,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm_update_params params; struct amdgpu_vm_bo_base *entry; bool flush_tlb_needed = false; + LIST_HEAD(relocated); int r, idx; - if (list_empty(&vm->relocated)) + spin_lock(&vm->status_lock); + list_splice_init(&vm->relocated, &relocated); + spin_unlock(&vm->status_lock); + + if (list_empty(&relocated)) return 0; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -697,7 +726,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - list_for_each_entry(entry, &vm->relocated, vm_status) { + list_for_each_entry(entry, &relocated, vm_status) { /* vm_flush_needed after updating moved PDEs */ flush_tlb_needed |= entry->moved; @@ -713,9 +742,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (flush_tlb_needed) atomic64_inc(&vm->tlb_seq); - while (!list_empty(&vm->relocated)) { - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, + while (!list_empty(&relocated)) { + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, vm_status); amdgpu_vm_bo_idle(entry); } @@ -912,6 +940,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, { struct amdgpu_bo_va *bo_va, *tmp; + spin_lock(&vm->status_lock); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) continue; @@ -936,7 +965,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -949,7 +977,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); } /** * amdgpu_vm_bo_update - update all BO mappings in the vm page table @@ -1278,24 +1306,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_bo_va *bo_va, *tmp; + struct amdgpu_bo_va *bo_va; struct dma_resv *resv; bool clear; int r; - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; + spin_lock(&vm->status_lock); } - spin_lock(&vm->invalidated_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ if (!amdgpu_vm_debug && dma_resv_trylock(resv)) @@ -1310,9 +1343,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, if (!clear) dma_resv_unlock(resv); - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -1387,7 +1420,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && !bo_va->base.moved) { - list_move(&bo_va->base.vm_status, &vm->moved); + amdgpu_vm_bo_moved(&bo_va->base); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -1763,9 +1796,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, } } - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2019,9 +2052,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); INIT_LIST_HEAD(&vm->invalidated); - spin_lock_init(&vm->invalidated_lock); + spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); + INIT_LIST_HEAD(&vm->pt_freed); + INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); /* create scheduler entities for page table updates */ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -2223,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + flush_work(&vm->pt_free_work); + root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); amdgpu_vm_set_pasid(adev, vm, 0); @@ -2301,7 +2338,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ #ifdef CONFIG_X86_64 if (amdgpu_vm_update_mode == -1) { - if (amdgpu_gmc_vram_full_visible(&adev->gmc)) + /* For asic with VF MMIO access protection + * avoid using CPU for VM table updates + */ + if (amdgpu_gmc_vram_full_visible(&adev->gmc) && + !amdgpu_sriov_vf_mmio_access_protection(adev)) adev->vm_manager.vm_update_mode = AMDGPU_VM_USE_CPU_FOR_COMPUTE; else @@ -2484,8 +2525,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | - AMDGPU_PTE_TF; + flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ @@ -2548,6 +2588,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) @@ -2585,7 +2626,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) id = 0; seq_puts(m, "\tInvalidated BOs:\n"); - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -2600,7 +2640,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) continue; total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); total_done_objs = id; seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9ecb7f663e19..83acb7bd80fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -254,6 +254,9 @@ struct amdgpu_vm { bool evicting; unsigned int saved_flags; + /* Lock to protect vm_bo add/del/move on all lists of vm */ + spinlock_t status_lock; + /* BOs who needs a validation */ struct list_head evicted; @@ -268,7 +271,6 @@ struct amdgpu_vm { /* regular invalidated BOs, but not yet updated in the PT */ struct list_head invalidated; - spinlock_t invalidated_lock; /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; @@ -276,6 +278,10 @@ struct amdgpu_vm { /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; + /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */ + struct list_head pt_freed; + struct work_struct pt_free_work; + /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; @@ -471,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); +void amdgpu_vm_pt_free_work(struct work_struct *work); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 88de9f0d4728..358b91243e37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -637,10 +637,34 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); entry->bo->vm_bo = NULL; + + spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); + spin_unlock(&entry->vm->status_lock); amdgpu_bo_unref(&entry->bo); } +void amdgpu_vm_pt_free_work(struct work_struct *work) +{ + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm; + LIST_HEAD(pt_freed); + + vm = container_of(work, struct amdgpu_vm, pt_free_work); + + spin_lock(&vm->status_lock); + list_splice_init(&vm->pt_freed, &pt_freed); + spin_unlock(&vm->status_lock); + + /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ + amdgpu_bo_reserve(vm->root.bo, true); + + list_for_each_entry_safe(entry, next, &pt_freed, vm_status) + amdgpu_vm_pt_free(entry); + + amdgpu_bo_unreserve(vm->root.bo); +} + /** * amdgpu_vm_pt_free_dfs - free PD/PT levels * @@ -652,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) */ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) + struct amdgpu_vm_pt_cursor *start, + bool unlocked) { struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry; + if (unlocked) { + spin_lock(&vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_move(&entry->vm_status, &vm->pt_freed); + + if (start) + list_move(&start->entry->vm_status, &vm->pt_freed); + spin_unlock(&vm->status_lock); + schedule_work(&vm->pt_free_work); + return; + } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) amdgpu_vm_pt_free(entry); @@ -673,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, */ void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL); + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); } /** @@ -966,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, if (cursor.entry->bo) { params->table_freed = true; amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor); + &cursor, + params->unlocked); } amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 1fd3cbca20a2..59cf64216fbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -47,6 +47,32 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) return r; } +/* Allocate a new job for @count PTE updates */ +static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, + unsigned int count) +{ + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; + struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate + : &p->vm->delayed; + unsigned int ndw; + int r; + + /* estimate how many dw we need */ + ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + if (p->pages_addr) + ndw += count * 2; + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + + r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, + ndw * 4, pool, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + return 0; +} + /** * amdgpu_vm_sdma_prepare - prepare SDMA command submission * @@ -61,21 +87,22 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { - enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE - : AMDGPU_IB_POOL_DELAYED; - unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + struct amdgpu_sync sync; int r; - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); + r = amdgpu_vm_sdma_alloc_job(p, 0); if (r) return r; - p->num_dw_left = ndw; - if (!resv) return 0; - return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm); + amdgpu_sync_create(&sync); + r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); + if (!r) + r = amdgpu_sync_push_to_job(&sync, p->job); + amdgpu_sync_free(&sync); + return r; } /** @@ -91,20 +118,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { struct amdgpu_ib *ib = p->job->ibs; - struct drm_sched_entity *entity; struct amdgpu_ring *ring; struct dma_fence *f; - int r; - entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; - ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); + ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, + sched); WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > p->num_dw_left); - r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); - if (r) - goto error; + f = amdgpu_job_submit(p->job); if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); @@ -112,17 +135,21 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { - amdgpu_bo_fence(p->vm->root.bo, f, true); + dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, + DMA_RESV_USAGE_BOOKKEEP); } - if (fence && !p->immediate) + if (fence && !p->immediate) { + /* + * Most hw generations now have a separate queue for page table + * updates, but when the queue is shared with userspace we need + * the extra CPU round trip to correctly flush the TLB. + */ + set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags); swap(*fence, f); + } dma_fence_put(f); return 0; - -error: - amdgpu_job_free(p->job); - return r; } /** @@ -202,8 +229,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t flags) { struct amdgpu_bo *bo = &vmbo->bo; - enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE - : AMDGPU_IB_POOL_DELAYED; struct dma_resv_iter cursor; unsigned int i, ndw, nptes; struct dma_fence *fence; @@ -211,12 +236,15 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, - DMA_RESV_USAGE_KERNEL, fence) { - r = amdgpu_sync_fence(&p->job->sync, fence); - if (r) + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + r = drm_sched_job_add_dependency(&p->job->base, fence); + if (r) { + dma_resv_iter_end(&cursor); return r; + } } + dma_resv_iter_end(&cursor); do { ndw = p->num_dw_left; @@ -227,19 +255,9 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, if (r) return r; - /* estimate how many dw we need */ - ndw = 32; - if (p->pages_addr) - ndw += count * 2; - ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); - ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, - &p->job); + r = amdgpu_vm_sdma_alloc_job(p, count); if (r) return r; - - p->num_dw_left = ndw; } if (!p->pages_addr) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 73a517bcf5c1..18c1a173d187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -439,7 +439,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; + remaining_size = (u64)vres->base.size; mutex_lock(&mgr->lock); while (remaining_size) { @@ -498,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, LIST_HEAD(temp); trim_list = &vres->blocks; - original_size = (u64)vres->base.num_pages << PAGE_SHIFT; + original_size = (u64)vres->base.size; /* * If size value is rounded up to min_block_size, trim the last @@ -533,8 +533,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > vres->base.num_pages) - start -= vres->base.num_pages; + if (start > PFN_UP(vres->base.size)) + start -= PFN_UP(vres->base.size); else start = 0; vres->base.start = max(vres->base.start, start); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d3b483aa81f8..47159e9a0884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -392,12 +392,20 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) } /** + * Only init hive->reset_domain for none SRIOV configuration. For SRIOV, + * Host driver decide how to reset the GPU either through FLR or chain reset. + * Guest side will get individual notifications from the host for the FLR + * if necessary. + */ + if (!amdgpu_sriov_vf(adev)) { + /** * Avoid recreating reset domain when hive is reconstructed for the case - * of reset the devices in the XGMI hive during probe for SRIOV + * of reset the devices in the XGMI hive during probe for passthrough GPU * See https://www.spinics.net/lists/amd-gfx/msg58836.html */ - if (adev->reset_domain->type != XGMI_HIVE) { - hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); + if (adev->reset_domain->type != XGMI_HIVE) { + hive->reset_domain = + amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); if (!hive->reset_domain) { dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); ret = -ENOMEM; @@ -406,9 +414,10 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) hive = NULL; goto pro_end; } - } else { - amdgpu_reset_get_reset_domain(adev->reset_domain); - hive->reset_domain = adev->reset_domain; + } else { + amdgpu_reset_get_reset_domain(adev->reset_domain); + hive->reset_domain = adev->reset_domain; + } } hive->hive_id = adev->gmc.xgmi.hive_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 552e6fb55aa8..30dcc1681b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -43,6 +43,7 @@ struct amdgpu_hive_info { } pstate; struct amdgpu_reset_domain *reset_domain; + uint32_t device_remove_count; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index e78e4c27b62a..6c97148ca0ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -70,6 +70,7 @@ enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID_RLC_SRLS, AMD_SRIOV_UCODE_ID_MEC, AMD_SRIOV_UCODE_ID_MEC2, + AMD_SRIOV_UCODE_ID_IMU, AMD_SRIOV_UCODE_ID_SOS, AMD_SRIOV_UCODE_ID_ASD, AMD_SRIOV_UCODE_ID_TA_RAS, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 6be9ac2b9c5b..18ae9433e463 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2081,8 +2081,11 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) } } record += fake_edid_record->ucFakeEDIDLength ? - fake_edid_record->ucFakeEDIDLength + 2 : - sizeof(ATOM_FAKE_EDID_PATCH_RECORD); + struct_size(fake_edid_record, + ucFakeEDIDString, + fake_edid_record->ucFakeEDIDLength) : + /* empty fake edid record must be 3 bytes long */ + sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 5647f13b98d4..cbca9866645c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -309,14 +309,10 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq */ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..90f87b2d985b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2800,8 +2801,6 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..0352de72c886 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2918,8 +2919,6 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b1c44fab074f..07bd16e82046 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@ #include <linux/pci.h> +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2675,7 +2676,6 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a22b45c92792..d73df100f2b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> @@ -2701,8 +2702,6 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e4dde41f2f68..49d34c7bbf20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3943,56 +3943,6 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) DRM_WARN_ONCE("CP firmware version too old, please update!"); } - -static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - -static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_2 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); - adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); - adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); - adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); -} - -static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_4 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); - adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); - adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); - adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); -} - static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4028,12 +3978,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) char fw_name[40]; char *wks = ""; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; @@ -4091,9 +4036,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.pfp_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); @@ -4102,9 +4045,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.me_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); @@ -4113,69 +4054,27 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.ce_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; + /* don't check this. There are apparently firmwares in the wild with + * incorrect size in the header + */ err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + dev_dbg(adev->dev, + "gfx10: amdgpu_ucode_validate() failed \"%s\"\n", + fw_name); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (version_major == 2) { - if (version_minor >= 1) - gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor >= 2) - gfx_v10_0_init_rlc_iram_dram_microcode(adev); - if (version_minor == 4) { - gfx_v10_0_init_tap_delays_microcode(adev); - } - } } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks); @@ -4185,9 +4084,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); @@ -4195,164 +4093,18 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec2_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); } else { err = 0; adev->gfx.mec2_fw = NULL; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - if (info->fw) { - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - if (adev->gfx.rlc.save_restore_list_cntl_size_bytes && - adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - - if (adev->gfx.rlc.rlc_iram_ucode_size_bytes && - adev->gfx.rlc.rlc_dram_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); - } - - } - - if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; - info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); - } - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - if (adev->gfx.mec2_fw) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; - info->fw = adev->gfx.mec2_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - } - } - gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, - "gfx10: Failed to load firmware \"%s\"\n", + "gfx10: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -4701,8 +4453,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - adev->gfx.funcs = &gfx_v10_0_gfx_funcs; - switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): @@ -7159,6 +6909,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); @@ -7841,6 +7593,8 @@ static int gfx_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v10_0_gfx_funcs; + switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): @@ -8737,7 +8491,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) @@ -8912,7 +8666,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, { uint32_t dw2 = 0; - if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev)) + if (amdgpu_mcbp) gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index fa718318568e..9447999a3a48 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -438,61 +438,12 @@ static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } -static void gfx_v11_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - -static void gfx_v11_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_2 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes); - adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes); - adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes); - adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); -} - -static void gfx_v11_0_init_rlcp_rlcv_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_3 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes); - adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes); - adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes); - adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes); -} - static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; char ucode_prefix[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; @@ -513,14 +464,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_fw->data, 2, 0); if (adev->gfx.rs64_enable) { dev_info(adev->dev, "CP RS64 enable\n"); - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix); @@ -531,14 +479,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); } if (!amdgpu_sriov_vf(adev)) { @@ -547,58 +492,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (version_major == 2) { - if (version_minor >= 1) - gfx_v11_0_init_rlc_ext_microcode(adev); - if (version_minor >= 2) - gfx_v11_0_init_rlc_iram_dram_microcode(adev); - if (version_minor == 3) - gfx_v11_0_init_rlcp_rlcv_microcode(adev); - } } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix); @@ -609,190 +510,23 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr_v2_0->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); } else { - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); } /* only one MEC for gfx 11.0.0. */ adev->gfx.mec2_fw = NULL; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - if (adev->gfx.rs64_enable) { - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.pfp_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.me_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK]; - info->ucode_id = AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr_v2_0->data_size_bytes), PAGE_SIZE); - } else { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - if (info->fw) { - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - if (adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlc_iram_ucode_size_bytes && - adev->gfx.rlc.rlc_dram_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcp_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_P; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE); - } - - if (adev->gfx.rlc.rlcv_ucode_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_V; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE); - } - } - out: if (err) { dev_err(adev->dev, - "gfx11: Failed to load firmware \"%s\"\n", + "gfx11: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -1109,7 +843,6 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) { - adev->gfx.funcs = &gfx_v11_0_gfx_funcs; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): @@ -1837,7 +1570,7 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); /* Enable trap for each kfd vmid. */ - data = RREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL)); + data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); } soc21_grbm_select(adev, 0, 0, 0, 0); @@ -1892,7 +1625,8 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v11_0_setup_rb(adev); gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -4270,6 +4004,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(*mqd)); + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); @@ -4922,6 +4658,8 @@ static int gfx_v11_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v11_0_gfx_funcs; + adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); @@ -4939,6 +4677,26 @@ static int gfx_v11_0_early_init(void *handle) return 0; } +static int gfx_v11_0_ras_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct ras_common_if *gfx_common_if; + int ret; + + gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!gfx_common_if) + return -ENOMEM; + + gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; + + ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); + if (ret) + dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); + + kfree(gfx_common_if); + return 0; +} + static int gfx_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4952,6 +4710,12 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { + r = gfx_v11_0_ras_late_init(handle); + if (r) + return r; + } + return 0; } @@ -5240,6 +5004,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { u32 reg, data; + amdgpu_gfx_off_ctrl(adev, false); + reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); @@ -5253,6 +5019,8 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); else WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + + amdgpu_gfx_off_ctrl(adev, true); } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5313,6 +5081,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): amdgpu_gfx_off_ctrl(adev, enable); break; case IP_VERSION(11, 0, 1): @@ -5338,6 +5107,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5559,7 +5329,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); if (flags & AMDGPU_IB_PREEMPTED) @@ -6320,6 +6090,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7f0b18b0d4c4..d47135606e3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4643,6 +4643,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1d6d3a852a0b..676832da75eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1091,27 +1091,6 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } -static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_1 *rlc_hdr; - - rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); - adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); - adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); - adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); - adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); - adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); - adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); - adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); - adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); - adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); - adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); - adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); - adev->gfx.rlc.reg_list_format_direct_reg_list_length = - le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); -} - static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) { adev->gfx.me_fw_write_wait = false; @@ -1273,9 +1252,6 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -1284,9 +1260,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.pfp_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); @@ -1295,9 +1269,7 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.me_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); @@ -1306,37 +1278,12 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.ce_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); out: if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; @@ -1353,11 +1300,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; uint16_t version_major; uint16_t version_minor; uint32_t smu_version; @@ -1386,92 +1329,17 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + if (err) + goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); - if (version_major == 2 && version_minor == 1) - adev->gfx.rlc.is_rlc_v2_1 = true; - - adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); - adev->gfx.rlc.save_and_restore_offset = - le32_to_cpu(rlc_hdr->save_and_restore_offset); - adev->gfx.rlc.clear_state_descriptor_offset = - le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); - adev->gfx.rlc.avail_scratch_ram_locations = - le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); - adev->gfx.rlc.reg_restore_list_size = - le32_to_cpu(rlc_hdr->reg_restore_list_size); - adev->gfx.rlc.reg_list_format_start = - le32_to_cpu(rlc_hdr->reg_list_format_start); - adev->gfx.rlc.reg_list_format_separate_start = - le32_to_cpu(rlc_hdr->reg_list_format_separate_start); - adev->gfx.rlc.starting_offsets_start = - le32_to_cpu(rlc_hdr->starting_offsets_start); - adev->gfx.rlc.reg_list_format_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); - adev->gfx.rlc.reg_list_size_bytes = - le32_to_cpu(rlc_hdr->reg_list_size_bytes); - adev->gfx.rlc.register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + - adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); - if (!adev->gfx.rlc.register_list_format) { - err = -ENOMEM; - goto out; - } - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) - adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); - - adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; - - tmp = (unsigned int *)((uintptr_t)rlc_hdr + - le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) - adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); - - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v9_0_init_rlc_ext_microcode(adev); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - if (adev->gfx.rlc.is_rlc_v2_1 && - adev->gfx.rlc.save_restore_list_cntl_size_bytes && - adev->gfx.rlc.save_restore_list_gpm_size_bytes && - adev->gfx.rlc.save_restore_list_srm_size_bytes) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; - info->fw = adev->gfx.rlc_fw; - adev->firmware.fw_size += - ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); - } - } - + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); out: if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.rlc_fw); adev->gfx.rlc_fw = NULL; @@ -1494,9 +1362,6 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, { char fw_name[30]; int err; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct gfx_firmware_header_v1_0 *cp_hdr; if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); @@ -1509,10 +1374,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.mec_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) @@ -1525,12 +1388,8 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_validate(adev->gfx.mec2_fw); if (err) goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); } else { err = 0; adev->gfx.mec2_fw = NULL; @@ -1540,49 +1399,12 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - if (adev->gfx.mec2_fw) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; - info->fw = adev->gfx.mec2_fw; - header = (const struct common_firmware_header *)info->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - - /* TODO: Determine if MEC2 JT FW loading can be removed - for all GFX V9 asic and above */ - if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, - PAGE_SIZE); - } - } - } - out: gfx_v9_0_check_if_need_gfxoff(adev); gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, - "gfx9: Failed to load firmware \"%s\"\n", + "gfx9: Failed to init firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; @@ -1742,7 +1564,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1761,7 +1583,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1783,7 +1605,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1832,7 +1654,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -2097,8 +1919,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) u32 gb_addr_config; int err; - adev->gfx.funcs = &gfx_v9_0_gfx_funcs; - switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): adev->gfx.config.max_hw_contexts = 8; @@ -2502,13 +2322,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -2645,14 +2465,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } if (k == adev->usec_timeout) { - gfx_v9_0_select_se_sh(adev, 0xffffffff, + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", @@ -2661,7 +2481,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3761,6 +3581,8 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + if (amdgpu_sriov_vf(adev) && adev->in_suspend) + amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v9_0_mqd_init(ring); @@ -4717,6 +4539,8 @@ static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.funcs = &gfx_v9_0_gfx_funcs; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) adev->gfx.num_gfx_rings = 0; @@ -5607,7 +5431,7 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne BUG_ON(offset > ring->buf_mask); BUG_ON(ring->ring[offset] != 0x55aa55aa); - cur = (ring->wptr & ring->buf_mask) - 1; + cur = (ring->wptr - 1) & ring->buf_mask; if (likely(cur > offset)) ring->ring[offset] = cur - offset; else @@ -6660,7 +6484,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6722,7 +6546,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - gfx_v9_0_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6737,7 +6561,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -7141,7 +6965,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -7174,7 +6998,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 8cf53e039c11..3f8676d23a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -397,6 +397,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); + if (amdgpu_sriov_vf(adev)) + return; + /* Setup L2 cache */ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 5d3fffd4929f..716ae6f2aefe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -154,6 +154,9 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; + if (amdgpu_sriov_vf(adev)) + return; + /* Disable AGP. */ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f513e2c2e964..657e53708248 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -371,7 +371,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, &job); if (r) goto error_alloc; @@ -380,10 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, job->vm_needs_flush = true; job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; amdgpu_ring_pad_ib(ring, &job->ibs[0]); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_submit; + fence = amdgpu_job_submit(job); mutex_unlock(&adev->mman.gtt_window_lock); @@ -392,9 +391,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; -error_submit: - amdgpu_job_free(job); - error_alloc: mutex_unlock(&adev->mman.gtt_window_lock); DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 846ccb6cf07d..66dfb574cc7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -186,6 +186,10 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; + unsigned char hub_ip = 0; + + hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -199,8 +203,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng); + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, hub_ip); if (tmp & 0x1) break; udelay(1); @@ -210,12 +214,12 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng); + tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng, hub_ip); tmp &= 1 << vmid; if (tmp) break; @@ -229,8 +233,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0); + WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0, hub_ip); /* Issue additional private vm invalidation to MMHUB */ if ((vmhub != AMDGPU_GFXHUB_0) && diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4603653916f5..67ca16a8027c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1103,10 +1103,13 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, *flags |= AMDGPU_PDE_BFS(0x9); } else if (level == AMDGPU_VM_PDB0) { - if (*flags & AMDGPU_PDE_PTE) + if (*flags & AMDGPU_PDE_PTE) { *flags &= ~AMDGPU_PDE_PTE; - else + if (!(*flags & AMDGPU_PTE_VALID)) + *addr |= 1 << PAGE_SHIFT; + } else { *flags |= AMDGPU_PTE_TF; + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c index 536dafb57ee0..fc69c1a29e23 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0_3.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "amdgpu_imu.h" +#include "imu_v11_0_3.h" #include "gc/gc_11_0_3_offset.h" #include "gc/gc_11_0_3_sh_mask.h" diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 067d10073a56..614394118a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -121,6 +121,10 @@ static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, if (r < 1) { DRM_ERROR("MES failed to response msg=%d\n", x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + return -ETIMEDOUT; } @@ -415,10 +419,6 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev, mes_hdr = (const struct mes_firmware_header_v1_0 *) adev->mes.fw[pipe]->data; - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_version); - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_data_version); adev->mes.uc_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b64cd46a159a..1395453a0662 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -98,7 +98,14 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; + signed long timeout = adev->usec_timeout; + if (amdgpu_emu_mode) { + timeout *= 100; + } else if (amdgpu_sriov_vf(adev)) { + /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ + timeout = 15 * 600 * 1000; + } BUG_ON(size % 4 != 0); spin_lock_irqsave(&mes->ring_lock, flags); @@ -118,10 +125,14 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - adev->usec_timeout * (amdgpu_emu_mode ? 100 : 1)); + timeout); if (r < 1) { DRM_ERROR("MES failed to response msg=%d\n", x_pkt->header.opcode); + + while (halt_if_hws_hang) + schedule(); + return -ETIMEDOUT; } @@ -187,6 +198,19 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; mes_add_queue_pkt.trap_en = 1; + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + + if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3)))) + mes_add_queue_pkt.trap_en = 1; + + /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ + mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; + mes_add_queue_pkt.gds_size = input->queue_size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); @@ -465,10 +489,6 @@ static int mes_v11_0_init_microcode(struct amdgpu_device *adev, mes_hdr = (const struct mes_firmware_header_v1_0 *) adev->mes.fw[pipe]->data; - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_version); - adev->mes.ucode_fw_version[pipe] = - le32_to_cpu(mes_hdr->mes_ucode_data_version); adev->mes.uc_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); @@ -1143,6 +1163,42 @@ static int mes_v11_0_sw_fini(void *handle) return 0; } +static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev) +{ + uint32_t data; + int i; + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + } + data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 1); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0); + + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + adev->mes.ring.sched.ready = false; +} + static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) { uint32_t tmp; @@ -1194,6 +1250,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { + if (adev->mes.ring.sched.ready) + mes_v11_0_kiq_dequeue_sched(adev); + mes_v11_0_enable(adev, false); return 0; } @@ -1249,9 +1308,6 @@ failure: static int mes_v11_0_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->mes.ring.sched.ready = false; return 0; } @@ -1283,7 +1339,8 @@ static int mes_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_in_reset(adev)) + if (!amdgpu_in_reset(adev) && + (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 4d304f22889e..998b5d17b271 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -32,8 +32,6 @@ #include "gc/gc_10_1_0_offset.h" #include "soc15_common.h" -#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid 0x064d -#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX 0 #define mmDAGB0_CNTL_MISC2_Sienna_Cichlid 0x0070 #define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX 0 @@ -574,7 +572,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; default: @@ -608,8 +605,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): - if (def != data) - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); if (def1 != data1) WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1); break; @@ -634,8 +629,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); - break; + /* There is no ATCL2 in MMHUB for 2.1.x */ + return; default: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); break; @@ -646,18 +641,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade else data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; - if (def != data) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { - case IP_VERSION(2, 1, 0): - case IP_VERSION(2, 1, 1): - case IP_VERSION(2, 1, 2): - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); - break; - default: - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); - break; - } - } + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); } static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, @@ -695,7 +680,10 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): - data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); + /* There is no ATCL2 in MMHUB for 2.1.x. Keep the status + * based on DAGB + */ + data = MM_ATC_L2_MISC_CG__ENABLE_MASK; data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index a2f04b249132..12906ba74462 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -290,7 +290,6 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index a977f0027928..e07757eea7ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -317,7 +317,6 @@ flr_done: reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index fd14fa9b9cd7..288c414babdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -529,7 +529,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b465baa26762..aa761ff3a5fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_LC_CNTL, data); } +#ifdef CONFIG_PCIEASPM static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v2_3_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v2_3_program_ltr(adev); def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index f7f6ddebd3e4..37615a77287b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } +#ifdef CONFIG_PCIEASPM static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); @@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v6_1_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v6_1_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 11848d1e238b..19455a725939 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = { }; +#ifdef CONFIG_PCIEASPM static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; @@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); } +#endif static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) { +#ifdef CONFIG_PCIEASPM uint32_t def, data; if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) @@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL6, data); - nbio_v7_4_program_ltr(adev); + /* Don't bother about LTR if LTR is not enabled + * in the path */ + if (adev->pdev->ltr_path) + nbio_v7_4_program_ltr(adev); def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; @@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL3, data); +#endif } const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index f30bc826a878..def89379b51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -28,6 +28,14 @@ #include "nbio/nbio_7_7_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev) { u32 tmp; @@ -336,4 +344,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, + .remap_hdp_registers = nbio_v7_7_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 5b5b1ef0c2b1..21d822b1d589 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -224,6 +224,12 @@ static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); } +static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + + static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) { int ret; @@ -720,6 +726,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv, .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6bdffdc1c0b9..c52d246a1d96 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -342,14 +342,10 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2584fa3cb13e..486d9b5c1b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -516,14 +516,10 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 0cf9d3b486b2..1122bd4eae98 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -561,44 +561,6 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) } } -static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v1_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - - /* arcturus shares the same FW memory across - all SDMA isntances */ - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || - adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) - break; - } - - memset((void *)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -615,9 +577,7 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[30]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; + int ret, i; DRM_DEBUG("\n"); @@ -656,58 +616,25 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (i == 0) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) { /* Acturus & Aldebaran will leverage the same FW memory for every SDMA instance */ - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } - else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); - - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); - if (err) - goto out; - } - } - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - for (i = 0; i < adev->sdma.num_instances; i++) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + ret = amdgpu_sdma_init_microcode(adev, fw_name, 0, true); + break; + } else { + ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false); + if (ret) + return ret; } } -out: - if (err) { - DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - sdma_v4_0_destroy_inst_ctx(adev); - } - return err; + return ret; } /** @@ -988,18 +915,12 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i, unset = 0; - - for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].ring; + int i; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); @@ -1030,20 +951,12 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; - for (i = 0; i < adev->sdma.num_instances; i++) { - sdma[i] = &adev->sdma.instance[i].page; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -1995,7 +1908,11 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - sdma_v4_0_destroy_inst_ctx(adev); + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0) || + adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) + amdgpu_sdma_destroy_inst_ctx(adev, true); + else + amdgpu_sdma_destroy_inst_ctx(adev, false); return 0; } @@ -2018,8 +1935,11 @@ static int sdma_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_irq_put(adev, &adev->sdma.ecc_irq, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a019ac92edb7..d4d9f196db83 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -240,10 +240,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; + int ret, i; if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5))) return 0; @@ -272,38 +269,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } + ret = amdgpu_sdma_init_microcode(adev, fw_name, i, false); + if (ret) + return ret; } -out: - if (err) { - DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } - } - return err; + + return ret; } static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -613,14 +584,10 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -1465,12 +1432,10 @@ static int sdma_v5_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - + for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - } + + amdgpu_sdma_destroy_inst_ctx(adev, false); return 0; } @@ -1491,8 +1456,11 @@ static int sdma_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v5_0_ctx_switch_enable(adev, false); sdma_v5_0_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 95689ef4be10..809eca54fc61 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,33 +89,6 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v1_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) -{ - release_firmware(adev->sdma.instance[0].fw); - - memset((void *)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v5_2_init_microcode - load ucode images from disk * @@ -132,9 +105,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) { const char *chip_name; char fw_name[40]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct common_firmware_header *header = NULL; DRM_DEBUG("\n"); @@ -169,42 +139,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name); - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - - if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 0))) - return 0; - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - for (i = 0; i < adev->sdma.num_instances; i++) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - } - -out: - if (err) { - DRM_ERROR("sdma_v5_2: Failed to load firmware \"%s\"\n", fw_name); - sdma_v5_2_destroy_inst_ctx(adev); - } - return err; + return amdgpu_sdma_init_microcode(adev, fw_name, 0, true); } static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -479,18 +414,10 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; - struct amdgpu_ring *sdma2 = &adev->sdma.instance[2].ring; - struct amdgpu_ring *sdma3 = &adev->sdma.instance[3].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1) || - (adev->mman.buffer_funcs_ring == sdma2) || - (adev->mman.buffer_funcs_ring == sdma3)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -1406,7 +1333,7 @@ static int sdma_v5_2_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - sdma_v5_2_destroy_inst_ctx(adev); + amdgpu_sdma_destroy_inst_ctx(adev, true); return 0; } @@ -1422,8 +1349,11 @@ static int sdma_v5_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v5_2_ctx_switch_enable(adev, false); sdma_v5_2_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 7ae572a08cb3..049c26a45d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -78,33 +78,6 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static int sdma_v6_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) -{ - int err = 0; - const struct sdma_firmware_header_v2_0 *hdr; - - err = amdgpu_ucode_validate(sdma_inst->fw); - if (err) - return err; - - hdr = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data; - sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); - sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); - - if (sdma_inst->feature_version >= 20) - sdma_inst->burst_nop = true; - - return 0; -} - -static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev) -{ - release_firmware(adev->sdma.instance[0].fw); - - memset((void*)adev->sdma.instance, 0, - sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); -} - /** * sdma_v6_0_init_microcode - load ucode images from disk * @@ -114,16 +87,10 @@ static void sdma_v6_0_destroy_inst_ctx(struct amdgpu_device *adev) * the driver (not loaded into hw). * Returns 0 on success, error on failure. */ - -// emulation only, won't work on real chip -// sdma 6.0.0 real chip need to use PSP to load firmware static int sdma_v6_0_init_microcode(struct amdgpu_device *adev) { char fw_name[30]; char ucode_prefix[30]; - int err = 0, i; - struct amdgpu_firmware_info *info = NULL; - const struct sdma_firmware_header_v2_0 *sdma_hdr; DRM_DEBUG("\n"); @@ -131,43 +98,7 @@ static int sdma_v6_0_init_microcode(struct amdgpu_device *adev) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v6_0_init_inst_ctx(&adev->sdma.instance[0]); - if (err) - goto out; - - for (i = 1; i < adev->sdma.num_instances; i++) { - memcpy((void*)&adev->sdma.instance[i], - (void*)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } - - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - sdma_hdr = (const struct sdma_firmware_header_v2_0 *)adev->sdma.instance[0].fw->data; - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0; - info->fw = adev->sdma.instance[0].fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1; - info->fw = adev->sdma.instance[0].fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE); - } - -out: - if (err) { - DRM_ERROR("sdma_v6_0: Failed to load firmware \"%s\"\n", fw_name); - sdma_v6_0_destroy_inst_ctx(adev); - } - return err; + return amdgpu_sdma_init_microcode(adev, fw_name, 0, true); } static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) @@ -467,14 +398,10 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se */ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; u32 rb_cntl, ib_cntl; int i; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_sdma_unset_buffer_funcs_helper(adev); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); @@ -484,9 +411,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 0); WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); } - - sdma0->sched.ready = false; - sdma1->sched.ready = false; } /** @@ -531,6 +455,9 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable) sdma_v6_0_rlc_stop(adev); } + if (amdgpu_sriov_vf(adev)) + return; + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); @@ -915,7 +842,8 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_cntl = order_base_2(prop->queue_size / 4) << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | - 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + 4 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT; m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); @@ -1370,27 +1298,27 @@ static int sdma_v6_0_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); - sdma_v6_0_destroy_inst_ctx(adev); + amdgpu_sdma_destroy_inst_ctx(adev, true); return 0; } static int sdma_v6_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = sdma_v6_0_start(adev); - - return r; + return sdma_v6_0_start(adev); } static int sdma_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { + /* disable the scheduler for SDMA */ + amdgpu_sdma_unset_buffer_funcs_helper(adev); return 0; + } sdma_v6_0_ctx_switch_enable(adev, false); sdma_v6_0_enable(adev, false); @@ -1598,6 +1526,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v6_0_ring_get_rptr, .get_wptr = sdma_v6_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index f675111ace20..abca8b529721 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -112,19 +112,16 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, static void si_dma_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; u32 rb_cntl; unsigned i; + amdgpu_sdma_unset_buffer_funcs_helper(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; /* dma0 */ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); rb_cntl &= ~DMA_RB_ENABLE; WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, false); } } diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 7aa570c1ce4a..81a6d5b94987 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -31,12 +31,23 @@ #include "amdgpu_psp.h" #include "amdgpu_xgmi.h" +static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_ctl) +{ +#if 0 + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) && + adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev)) + return true; +#endif + return false; +} + static struct amdgpu_reset_handler * sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { struct amdgpu_reset_handler *handler; - struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; if (reset_context->method != AMD_RESET_METHOD_NONE) { list_for_each_entry(handler, &reset_ctl->reset_handlers, @@ -44,15 +55,13 @@ sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, if (handler->reset_method == reset_context->method) return handler; } - } else { - list_for_each_entry(handler, &reset_ctl->reset_handlers, + } + + if (sienna_cichlid_is_mode2_default(reset_ctl)) { + list_for_each_entry (handler, &reset_ctl->reset_handlers, handler_list) { - if (handler->reset_method == AMD_RESET_METHOD_MODE2 && - adev->pm.fw_version >= 0x3a5500 && - !amdgpu_sriov_vf(adev)) { - reset_context->method = AMD_RESET_METHOD_MODE2; + if (handler->reset_method == AMD_RESET_METHOD_MODE2) return handler; - } } } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index fde6154f2009..e3b2b6b4f1a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1211,22 +1211,17 @@ static int soc15_common_sw_fini(void *handle) return 0; } -static void soc15_doorbell_range_init(struct amdgpu_device *adev) +static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev) { int i; - struct amdgpu_ring *ring; - /* sdma/ih doorbell range are programed by hypervisor */ + /* sdma doorbell range is programed by hypervisor */ if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; adev->nbio.funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, + true, adev->doorbell_index.sdma_engine[i] << 1, adev->doorbell_index.sdma_doorbell_range); } - - adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, - adev->irq.ih.doorbell_index); } } @@ -1251,10 +1246,11 @@ static int soc15_common_hw_init(void *handle) soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not * in SDMA/IH/MM/ACV range will be routed to CP. So - * we need to init SDMA/IH/MM/ACV doorbell range prior - * to CP ip block init and ring test. + * we need to init SDMA doorbell range prior + * to CP ip block init and ring test. IH already + * happens before CP. */ - soc15_doorbell_range_init(adev); + soc15_sdma_doorbell_range_init(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index a26c5723c46e..1d4013ed0d10 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -421,7 +421,9 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): return false; default: return true; @@ -582,10 +584,6 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_ATHUB | AMD_PG_SUPPORT_MMHUB; - if (amdgpu_sriov_vf(adev)) { - adev->cg_flags = 0; - adev->pg_flags = 0; - } adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update break; case IP_VERSION(11, 0, 2): @@ -628,20 +626,21 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; case IP_VERSION(11, 0, 3): adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | - AMD_CG_SUPPORT_JPEG_MGCG; + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - if (amdgpu_sriov_vf(adev)) { - /* hypervisor control CG and PG enablement */ - adev->cg_flags = 0; - adev->pg_flags = 0; - } adev->external_rev_id = adev->rev_id + 0x20; break; default: @@ -649,6 +648,9 @@ static int soc21_common_early_init(void *handle) return -EINVAL; } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_setting(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 939cb203f7ad..f17d297b594b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -327,10 +327,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ @@ -343,10 +342,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr); - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, + amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index bf7524f16b66..72fd963f178b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,20 +187,51 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + uint32_t channel_index; + uint64_t soc_pa, retired_page, column; + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + /* clear [C4 C3 C2] in soc physical address */ + soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); + + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + + /* shift R14 bit */ + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } +} + static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t ch_inst, uint32_t umc_inst) { - uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column; - uint32_t channel_index; + uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) @@ -209,42 +240,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* The umc channel bits are not original values, they are hashed */ - SET_CHANNEL_HASH(channel_index, soc_pa); - - /* clear [C4 C3 C2] in soc physical address */ - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - - /* shift R14 bit */ - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - } - } + umc_v6_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } } @@ -452,14 +456,11 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, static void umc_v6_7_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, - uint32_t ch_inst, + uint32_t umc_reg_offset, uint32_t ch_inst, uint32_t umc_inst) { uint32_t mc_umc_status_addr; - uint32_t channel_index; - uint64_t mc_umc_status, mc_umc_addrt0; - uint64_t err_addr, soc_pa, retired_page, column; + uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -477,45 +478,15 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, return; } - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); - err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr = + REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* The umc channel bits are not original values, they are hashed */ - SET_CHANNEL_HASH(channel_index, soc_pa); - - /* clear [C4 C3 C2] in soc physical address */ - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - - /* shift R14 bit */ - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); - } - } + umc_v6_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } /* clear umc status */ @@ -540,8 +511,7 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, ch_inst); umc_v6_7_query_error_address(adev, err_data, - umc_reg_offset, - ch_inst, + umc_reg_offset, ch_inst, umc_inst); } } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index fe41ed2f5945..105245d5b6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -71,5 +71,7 @@ extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; - +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 36a2053f2e8b..91235df54e22 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -101,22 +101,16 @@ static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) { - uint32_t ecc_err_cnt, ecc_err_cnt_addr; uint64_t mc_umc_status; uint32_t mc_umc_status_addr; /* UMC 8_10 registers */ - ecc_err_cnt_addr = - SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); - *error_count += - (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - - UMC_V8_10_CE_CNT_INIT); - - /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + /* Rely on MCUMC_STATUS for correctable error counter + * MCUMC_STATUS is a 64 bit register + */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) @@ -214,7 +208,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, { uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; - uint32_t channel_index; + uint64_t mc_umc_addrt0, na_err_addr_base; + uint64_t na_err_addr, retired_page_addr; + uint32_t channel_index, addr_lsb, col = 0; + int ret = 0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -235,13 +232,10 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, umc_inst * adev->umc.channel_inst_num + ch_inst]; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - uint32_t addr_lsb; - uint64_t mc_umc_addrt0; + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); @@ -249,32 +243,24 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev, /* the lowest lsb bits should be ignored */ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); - err_addr &= ~((0x1ULL << addr_lsb) - 1); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { - uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); - uint64_t na_err_addr, retired_page_addr; - uint32_t col = 0; - int ret = 0; - - /* loop for all possibilities of [C6 C5] in normal address. */ - for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { - na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); - - /* Mapping normal error address to retired soc physical address. */ - ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, - na_err_addr, &retired_page_addr); - if (ret) { - dev_err(adev->dev, "Failed to map pa from umc na.\n"); - break; - } - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", - retired_page_addr); - amdgpu_umc_fill_error_record(err_data, na_err_addr, - retired_page_addr, channel_index, umc_inst); + na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); } } @@ -344,6 +330,31 @@ static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) } } +static uint32_t umc_v8_10_query_ras_poison_mode_per_channel( + struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_ctrl_addr, ecc_ctrl; + + ecc_ctrl_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccCtrl); + ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr + + umc_reg_offset) * 4); + + return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_GeccCtrl, UCFatalEn); +} + +static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev) +{ + uint32_t umc_reg_offset = 0; + + /* Enabling fatal error in umc node0 instance0 channel0 will be + * considered as fatal error mode + */ + umc_reg_offset = get_umc_v8_10_reg_offset(adev, 0, 0, 0); + return !umc_v8_10_query_ras_poison_mode_per_channel(adev, umc_reg_offset); +} + const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -354,4 +365,5 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .hw_ops = &umc_v8_10_ras_hw_ops, }, .err_cnt_init = umc_v8_10_err_cnt_init, + .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index f35253e0eaa6..b717fdaa46e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -108,20 +108,35 @@ static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } +static void umc_v8_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) +{ + uint64_t retired_page; + uint32_t channel_index; + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_4KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); +} + static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint32_t ch_inst, uint32_t umc_inst) { - uint64_t mc_umc_status, err_addr, retired_page; - uint32_t channel_index; + uint64_t mc_umc_status, err_addr; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; - channel_index = - adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) @@ -130,24 +145,15 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_4KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + umc_v8_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } } @@ -324,14 +330,12 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t lsb, mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + uint64_t mc_umc_status, err_addr, mc_umc_addrt0; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); - mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (mc_umc_status == 0) @@ -343,10 +347,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, return; } - /* calculate error address if ue/ce error is detected */ + /* calculate error address if ue error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); /* the lowest lsb bits should be ignored */ @@ -354,16 +357,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); err_addr &= ~((0x1ULL << lsb) - 1); - /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_4KB_BLOCK(err_addr) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(err_addr); - - /* we only save ue error information currently, ce is skipped */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + umc_v8_7_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst); } /* clear umc status */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 375c440957dc..5fe872f4bea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -216,8 +216,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -280,8 +280,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e668b3baa8c6..e407be6cb63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -213,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) * * Open up a stream for HW test */ -static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_bo *bo, struct dma_fence **fence) { @@ -224,8 +224,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -276,7 +276,7 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle, struct amdgpu_bo *bo, struct dma_fence **fence) { @@ -287,8 +287,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl uint64_t addr; int i, r; - r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 39405f0db824..9c8b5fd99037 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,21 +1761,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1846,7 +1848,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p); + r = vcn_v3_0_limit_sched(p, job); if (r) goto out; } @@ -1860,7 +1862,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_ring *ring = amdgpu_job_ring(job); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1879,7 +1881,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, job, + ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 09c89faa8c27..897a5ce9c9da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -150,6 +150,10 @@ static int vcn_v4_0_sw_init(void *handle) fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + if (amdgpu_sriov_vf(adev)) fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); @@ -1591,21 +1595,23 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] [AMDGPU_RING_PRIO_0].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1676,7 +1682,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v4_0_limit_sched(p); + r = vcn_v4_0_limit_sched(p, job); if (r) goto out; } @@ -1689,32 +1695,34 @@ out: #define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - struct amdgpu_job *job, - struct amdgpu_ib *ib) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + struct amdgpu_ring *ring = amdgpu_job_ring(job); + struct amdgpu_vcn_decode_buffer *decode_buffer; + uint64_t addr; uint32_t val; - int r = 0; /* The first instance can decode anything */ if (!ring->me) - return r; + return 0; /* unified queue ib header has 8 double words. */ if (ib->length_dw < 8) - return r; + return 0; val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + if (val != RADEON_VCN_ENGINE_TYPE_DECODE) + return 0; - if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; - if (decode_buffer->valid_buf_flag & 0x1) - r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo); - } - return r; + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); } static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 03b7066471f9..1e83db0c5438 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 2022ffbb8dba..1706081d054d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -267,7 +267,7 @@ static void vega20_ih_reroute_ih(struct amdgpu_device *adev) /* vega20 ih reroute will go through psp this * function is used for newer asics starting arcturus */ - if (adev->asic_type >= CHIP_ARCTURUS) { + if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) { /* Reroute to IH ring 1 for VMC */ WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); @@ -308,7 +308,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if (adev->asic_type == CHIP_ARCTURUS && + if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); if (adev->irq.ih.use_bus_addr) { @@ -321,7 +321,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* psp firmware won't program IH_CHICKEN for aldebaran * driver needs to program it properly according to * MC_SPACE type in IH_RB_CNTL */ - if (adev->asic_type == CHIP_ALDEBARAN) { + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); if (adev->irq.ih.use_bus_addr) { ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, @@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) } } + if (!amdgpu_sriov_vf(adev)) + adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); + pci_set_master(adev->pdev); /* enable interrupts */ |
